Analysis of the Broad’s Avana CRISPR (Broad Institute Cancer Dependency Map 2018, Meyers et al. 2017) and the Broad and Dana-Farber Cancer Institite’s Achilles shRNA (MacFarland et al. 2018, Data Science 2018).
The Avana screen produced results using CERES (Meyers et al. 2017) (GitHub), which generates gene dependency scores from sgRNA depletion scores from gene essentiality screens and eliminates bias arising from the effect of copy number variation on Cas9 DNA cleavage. The lower the CERES score, the higher the likelihood that the gene is essential in the associated cell line. Scores are scaled per cell line such that a score of 0 is the median effect of nonessential genes and -1 is the median effect of common core essential genes.
In a previous version of the shRNA screen, DEMETER (GitHub) was used to compute a dependency score for each gene by using the depletion values from each shRNA to infer the effect of target knockdown (on-target) and of expressing a given miRNA seed (off-target) in each cell line. More negative values indicate increased dependency, while more positive values indicate lower dependency. Zero represents the avergae dependency across all cell lines.
In the new data release, DEMETER2 (GitHub repository), developed by McFarland et al. (2018) was used to analyze the Achilles screen. DEMETER2 expands on DEMETER by including parameters for cell-line-specific screen effects and noisy data, correcting for global differences in shRNA levels across cell lines, pooling data acorss cell lines via hierarchical modeling, and using Bayesian inference to compute uncertainty estimates. Now, a score of zero represents no dependency.
All annotation files (copy number, mutation status, and gene expression) (Consortium and Consortium 2015, Barretina et al. 2012) were downloaded from the DepMap Data Portal.
Load libraries.
library(NMF)
library(ggpubr)
library(ggsignif)
library(rowr)
library(data.table)
library(CePa)
library(plyr)
library(tidyverse)
library(magrittr)
library(matrixStats)
library(parallel)
library(kableExtra)
library(gridExtra)
library(broom)
library(glmnet)
library(devtools)
library(reshape2)
library(caret)
library(bsselectR)
library(ComplexHeatmap)
library(circlize)
adj_signif <- function(df) {
# Takes df from compare_means and creates a signif code column for adjusted p-vals
df$p.signif <- ifelse(df$p.signif == "ns", NA, df$p.signif)
df$p.signif.adj <- ifelse(df$p.adj <= 0.0001, "****",
ifelse(df$p.adj <= 0.001, "***",
ifelse(df$p.adj <= 0.01, "**",
ifelse(df$p.adj <= 0.05, "*", NA))))
df$p.short <- formatC(df$p, format = "g", digits = 2)
df$p.adj.short <- formatC(df$p.adj, format = "g", digits = 2)
return(df)
}
makeCRISPRgrob <- function(g) {
gene <- filter(crispr_data, Hugo_Symbol == g)
# Mutation status
crispr_color <- as.character(gene$Color)
names(crispr_color) <- gene$Mutation_Status
sig <- filter(crispr_signif, Hugo_Symbol == g)
plot_mut <- ggplot(data = gene, aes(x = Mutation_Status, y = Score, color = Mutation_Status)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(alpha = 0.3, size = 0.7, position = position_jitter(w = 0.05)) +
scale_color_manual(values = crispr_color) +
geom_hline(yintercept = 0, linetype = 2, lwd = 0.3) +
theme_light() +
theme(legend.position = "none") +
labs(x = paste0(g, " Mutation Status"), y = "CERES Score",
title = "Mutation Status",
subtitle = paste0("Wilcoxon test:\n- BH-corrected p-value: ", sig$p.adj.short, "\n- Uncorrected p-value: ", sig$p.short, "\nMutant lines harbor deleterious mutations."))
# Copy number
lo_cn <- range(gene$Copy_Number[!is.na(gene$Copy_Number)])[1]
hi_cn <- range(gene$Copy_Number[!is.na(gene$Copy_Number)])[2]
mid_cn <- (hi_cn - lo_cn) / 2
plot_cn <- ggplot(data = gene, aes(x = Copy_Number, y = Score, color = Mutation_Status)) +
geom_point(size = 0.5, alpha = 0.5) +
geom_smooth(method = "lm", size = 0.5) +
geom_hline(yintercept = 0, linetype = 2, lwd = 0.3) +
scale_color_manual(values = crispr_color) +
stat_cor(method = "pearson", show.legend = FALSE, label.x = c((lo_cn + mid_cn) / 2, (hi_cn - mid_cn) / 2 + mid_cn), label.y = max(gene$Score)) +
theme(legend.position = "none") +
labs(y = "CERES Score", color = "Mutation Status",
x = paste0(g, " Theoretical Copy number"), title = "Copy Number",
subtitle = "r: Pearson correlation coeffcient")
# Gene expression
lo_ge <- range(gene$RPKM_log2[!is.na(gene$RPKM_log2)])[1]
hi_ge <- range(gene$RPKM_log2[!is.na(gene$RPKM_log2)])[2]
mid_ge <- (hi_ge - lo_ge) / 2
plot_ge <- ggplot(data = gene, aes(x = RPKM_log2, y = Score, color = Mutation_Status)) +
geom_point(size = 0.5, alpha = 0.5) +
geom_smooth(method = "lm", size = 0.5) +
geom_hline(yintercept = 0, linetype = 2, lwd = 0.3) +
scale_color_manual(values = crispr_color) +
stat_cor(method = "pearson", show.legend = FALSE, label.x = c((lo_ge + mid_ge) / 2, (hi_ge - mid_ge) / 2 + mid_ge), label.y = max(gene$Score)) +
theme(legend.position = "none") +
labs(y = "CERES Score", color = "Mutation Status",
x = paste0(g, " Gene Expression [log2(RPKM)]"),
title = "Gene Expression",
subtitle = "r: Pearson correlation coeffcient")
# Cell line lineage
plot_tissue <- ggplot(data = gene, aes(x = primary_tissue, y = Score, color = Mutation_Status)) +
geom_point(alpha = 0.5) +
scale_color_manual(values = crispr_color) +
geom_hline(yintercept = 0, linetype = 2, lwd = 0.3) +
scale_y_continuous(sec.axis = sec_axis(~ .)) +
coord_flip() +
theme(legend.position = "none") +
labs(y = "CERES Score", x = "Primary Tissue",
color = "Mutation Status", title = g)
# Arrange plots
plot <- ggarrange(ggarrange(plot_tissue, nrow = 1, labels = c("A")),
ggarrange(plot_mut, plot_cn, plot_ge, nrow = 3,
labels = c("B", "C", "D"), heights = c(2, 3, 3)),
font.label = list(size = 30, face = "bold"),
nrow = 1, ncol = 2, widths = c(3, 2))
return(plot)
}
makeCRISPRlinplot <- function(g) {
sig <- filter(crispr_signif_lineage, Hugo_Symbol == g)
if(nrow(sig) == 0) {
return(NULL)
}
else {
gene <- filter(crispr_data, Hugo_Symbol == g)
gene <- merge(gene, sig, by = c("Hugo_Symbol", "group_general_lineage_name"))
gene <- mutate(gene, group_general_lineage_name = reorder(group_general_lineage_name, p, mean))
crispr_color <- as.character(gene$Color)
names(crispr_color) <- gene$Mutation_Status
score_range <- abs(range(gene$Score)[2] - range(gene$Score)[1])
round_accuracy <- ifelse(score_range <= 2, 0.25,
ifelse(score_range <= 3, 0.5, 1.0))
plot <- ggplot(data = gene, aes(x = group_general_lineage_name, y = Score)) +
geom_point(alpha = 0.5, mapping = aes(color = Mutation_Status)) +
scale_color_manual(values = crispr_color) +
coord_cartesian(y = c(min(gene$Score), round_any(x = max(gene$Score), accuracy = round_accuracy, f = ceiling))) +
geom_signif(data = gene, mapping = aes(xmin = group_general_lineage_name, xmax = group_general_lineage_name, annotations = paste(p.short, "\n", p.adj.short), y_position = max(gene$Score)), manual = TRUE, tip_length = 0, size = 0, textsize = 3) +
theme(legend.position = "top", axis.text.x = element_text(angle = 70, hjust = 1, size = 10)) +
labs(y = "CERES Score",
x = "Lineage",
color = "Mutation Status",
title = paste0(g, ": CERES score by cell line lineage"),
subtitle = "Lineages sorted by increasing p-value; labels indicate unadjusted p-value / BH-corrected p-value.\nMutant lines harbor deleterious mutations.")
return(plot)
}
}
makeCRISPRtissueplot <- function(g) {
sig <- filter(crispr_signif_tissue, Hugo_Symbol == g)
if(nrow(sig) == 0) {
return(NULL)
}
else {
gene <- filter(crispr_data, Hugo_Symbol == g)
gene <- merge(gene, sig, by = c("Hugo_Symbol", "primary_tissue"))
gene <- mutate(gene, primary_tissue = reorder(primary_tissue, p, mean))
crispr_color <- as.character(gene$Color)
names(crispr_color) <- gene$Mutation_Status
score_range <- abs(range(gene$Score)[2] - range(gene$Score)[1])
round_accuracy <- ifelse(score_range <= 2, 0.25,
ifelse(score_range <= 3, 0.5, 1.0))
plot <- ggplot(data = gene, aes(x = primary_tissue, y = Score)) +
geom_point(alpha = 0.5, mapping = aes(color = Mutation_Status)) +
scale_color_manual(values = crispr_color) +
coord_cartesian(y = c(min(gene$Score), round_any(x = max(gene$Score), accuracy = round_accuracy, f = ceiling))) +
geom_signif(data = gene, mapping = aes(xmin = primary_tissue, xmax = primary_tissue, annotations = paste(p.short, "\n", p.adj.short), y_position = max(gene$Score)), manual = TRUE, tip_length = 0, size = 0, textsize = 3) +
theme(legend.position = "top", axis.text.x = element_text(angle = 70, hjust = 1, size = 10)) +
labs(y = "CERES Score",
x = "Primary Tissue",
color = "Mutation Status",
title = paste0(g, ": CERES score by cell line primary tissue"),
subtitle = "Primary tissues sorted by increasing p-value; labels indicate unadjusted p-value / BH-corrected p-value.\nMutant lines harbor deleterious mutations.")
return(plot)
}
}
This comprehensive cancer cell line information curated by Daniel Charytonowicz.
ccl_converter <- read.delim("./data_munging/cell_line_database_v1_20180911.tsv", row.names = 1, sep = "\t", header = TRUE)
ccl_info <- read.delim("./data_munging/DepMap-2018q3-celllines.csv", sep = ",", header = TRUE, na.strings = c("", NA))
ccl_info$Primary.Disease <- gsub("\\\\", "", ccl_info$Primary.Disease)
ccl_info$Primary.Disease <- gsub("Ewings", "Ewing's", ccl_info$Primary.Disease)
# From figshare
crispr_meta <- read.delim("./data_munging/sample_info_18Q3_crispr.csv", sep = ",", header = TRUE, na.strings = c("", NA))
colnames(crispr_meta)[7] <- "CCLE_Name"
Select genes from the Cancer Gene Census (CGC). The list was pulled from the International Cancer Genome Consortium (ICGC) data portal (Advaced Search > Genes > Curated Gene Set > Cancer Gene Census).
cgc <- data_frame("Hugo_Symbol" = read.delim("./data_munging/gene-ids-for-set-Cancer Gene Census.tsv", header = FALSE, sep = "\t")[, 2])
For mutation calling, get paired gene name and cell line fields in a data frame. For this analysis, we don’t care about how many mutations there are per gene or what type of mutations there are, so I didn’t save more information. I took unique gene-cell line combinations since we only cared about mutation presence/absence. Add a Mutation_Status column denoting all entries in MAF files as mutations present in the associated cell lines.
maf_raw <- read.delim("./data_munging/CCLE_DepMap_18q3_maf_20180718.txt.gz", header = TRUE, sep = "\t")
# Filter for cell lines in CRISPR screen
maf_raw <- filter(maf_raw, Broad_ID %in% unique(crispr_meta$Broad_ID))
colnames(maf_raw)[colnames(maf_raw) == "Tumor_Sample_Barcode"] <- "CCLE_Name"
# Select columns
maf_df <- subset(maf_raw, select = c("Hugo_Symbol", "CCLE_Name", "Broad_ID", "Variant_Classification", "isDeleterious", "Reference_Allele"))
maf_df$Var_Length <- nchar(as.character(maf_df$Reference_Allele))
# Add Mutation_Status column
maf_df$Mutation_Status_Deleterious <- ifelse(maf_df$isDeleterious == TRUE, "Mutant", "Other")
maf_df$Mutation_Status_Nonsilent <- ifelse(maf_df$Variant_Classification == "Silent", "Other", "Mutant")
maf_df$Mutation_Status_DeleteriousMissense <- ifelse(maf_df$isDeleterious == TRUE | maf_df$Variant_Classification == "Missense_Mutation", "Mutant", "Other")
maf_df <- unique(subset(maf_df, select = c("Hugo_Symbol", "CCLE_Name", "Broad_ID", "Mutation_Status_Deleterious", "Mutation_Status_Nonsilent", "Mutation_Status_DeleteriousMissense", "Var_Length")))
# Whole MAF summary table
maf_summ <- maf_raw[, c("Variant_Classification", "isDeleterious")] %>% group_by(Variant_Classification, isDeleterious) %>% tally()
maf_summ$Percent <- format(round(maf_summ$n / sum(maf_summ$n) * 100, 4), nsmall = 2)
maf_summ$isDeleterious <- ifelse(maf_summ$isDeleterious == "TRUE", "Yes", "No")
knitr::kable(maf_summ, caption = "Distribution of variant classifications in the MAF file") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
| Variant_Classification | isDeleterious | n | Percent |
|---|---|---|---|
| 3’UTR | No | 1 | 0.0003 |
| 3’UTR | Yes | 30 | 0.0084 |
| 5’Flank | No | 8 | 0.0022 |
| 5’Flank | Yes | 34 | 0.0095 |
| 5’UTR | No | 2 | 0.0006 |
| 5’UTR | Yes | 15 | 0.0042 |
| De_novo_Start_OutOfFrame | Yes | 2006 | 0.5595 |
| Frame_Shift_Del | Yes | 12441 | 3.4698 |
| Frame_Shift_Ins | Yes | 9215 | 2.5701 |
| IGR | No | 6 | 0.0017 |
| IGR | Yes | 44 | 0.0123 |
| In_Frame_Del | No | 1747 | 0.4872 |
| In_Frame_Ins | No | 442 | 0.1233 |
| Intron | No | 9 | 0.0025 |
| Intron | Yes | 187 | 0.0522 |
| Missense_Mutation | No | 211359 | 58.9484 |
| Nonsense_Mutation | Yes | 12711 | 3.5451 |
| Nonstop_Mutation | Yes | 335 | 0.0934 |
| Silent | No | 94225 | 26.2795 |
| Splice_Site | Yes | 13180 | 3.6759 |
| Start_Codon_Del | Yes | 25 | 0.0070 |
| Start_Codon_Ins | Yes | 33 | 0.0092 |
| Start_Codon_SNP | No | 402 | 0.1121 |
| Stop_Codon_Del | No | 1 | 0.0003 |
| Stop_Codon_Del | Yes | 52 | 0.0145 |
| Stop_Codon_Ins | Yes | 39 | 0.0109 |
max(nchar(as.character(maf_raw$Reference_Allele)))
maf_raw %>% group_by(Variant_Type, isTCGAhotspot) %>% tally()
test <- filter(maf_raw, Hugo_Symbol == "KRAS")
test %>% group_by(isDeleterious) %>% tally()
cn <- read.delim("./data_munging/public_18Q3_gene_cn.csv.gz", sep = ",", check.names = FALSE, header = TRUE)
# Convert log2 ratios (log2(CN/2)) to CN
cn[2:ncol(cn)] <- lapply(cn[2:ncol(cn)], function(x) 2 * (2 ^ x))
# Remove Entrez gene IDs from colnames
colnames(cn) <- gsub(" .*", "", colnames(cn))
colnames(cn)[1] <- "Broad_ID"
# Melt
cn_melt <- melt(data = cn, id.vars = "Broad_ID", measure.vars = colnames(cn[2:ncol(cn)]), variable.name = "Hugo_Symbol", value.name = "Copy_Number")
saveRDS(cn_melt, "./data_munging/rds/cn_melt_18Q3.rds", compress = "xz")
cn_melt <- readRDS("./data_munging/rds/cn_melt_18Q3.rds")
Gene expression data (Reads Per Kilobase of transcript, per Million mapped reads, RPKM).
ge <- read.delim("./../crispr_lineages_giant_files/CCLE_DepMap_18q3_RNAseq_RPKM_20180718.gct.gz", skip = 2, header = TRUE, sep = "\t", check.names = FALSE)
# Edit columns
ge$Name <- NULL
colnames(ge)[1] <- "Hugo_Symbol"
# Melt
ge_melt <- melt(data = ge, id.vars = "Hugo_Symbol", measure.vars = colnames(ge[2:ncol(ge)]), value.name = "RPKM")
## Split variable column
ge_melt <- with(ge_melt, cbind(Hugo_Symbol, colsplit(variable, pattern = " ", names = c("CCLE_Name", "Broad_ID")), RPKM))
## Remove parentheses around Broad IDs
ge_melt$Broad_ID <- gsub("\\(|\\)", "", ge_melt$Broad_ID)
saveRDS(ge_melt, "./../crispr_lineages_giant_files/ge_melt_18Q3.rds", compress = "xz")
ge_melt <- readRDS("./../crispr_lineages_giant_files/ge_melt_18Q3.rds")
dep <- read.delim("./data_munging/gene_dependency_18Q3.csv.gz", sep = ",", header = TRUE, check.names = FALSE)
# Remove Entrez gene IDs from colnames
colnames(dep) <- gsub(" .*", "", colnames(dep))
The latest CRISPR CERES score data (18Q3, August 2018) was pulled from the DepMap Data Portal (Broad Institute Cancer Dependency Map 2018, Meyers et al. 2017).
crispr <- read.delim("./data_munging/gene_effect_18Q3.csv.gz", sep = ",", header = TRUE, check.names = FALSE)
# Remove Entrez gene IDs from colnames
colnames(crispr) <- gsub(" .*", "", colnames(crispr))
Merge annotation data:
# Melt CRISPR dataset for merging
crispr_melt <- melt(crispr, id.vars = "Broad_ID", measure.vars = colnames(crispr)[2:ncol(crispr)], variable.name = "Hugo_Symbol", value.name = "Score")
# Melt dependency probabilities dataset for merging
dep_melt <- melt(dep, id.vars = "Broad_ID", measure.vars = colnames(dep)[2:ncol(dep)], variable.name = "Hugo_Symbol", value.name = "Dep_Prob")
# Merge dependency probabilities
crispr_melt <- merge(crispr_melt, dep_melt, by = c("Broad_ID", "Hugo_Symbol"), all.x = TRUE)
# Merge cell line metadata
crispr_melt <- merge(crispr_melt, ccl_info, by = "Broad_ID", all.x = TRUE)
crispr_melt <- merge(crispr_melt, crispr_meta, by = c("CCLE_Name", "Broad_ID"), all.x = TRUE)
# Merge mutation annotations
crispr_muts <- merge(crispr_melt, maf_df, by = c("Hugo_Symbol", "CCLE_Name", "Broad_ID"), all.x = TRUE)
crispr_muts$Hugo_Symbol <- factor(crispr_muts$Hugo_Symbol)
crispr_muts <- crispr_muts %>% mutate(Mutation_Status_Deleterious = if_else(is.na(Mutation_Status_Deleterious), "Other", Mutation_Status_Deleterious), Mutation_Status_DeleteriousMissense = if_else(is.na(Mutation_Status_DeleteriousMissense), "Other", Mutation_Status_DeleteriousMissense), Mutation_Status_Nonsilent = if_else(is.na(Mutation_Status_Nonsilent), "Other", Mutation_Status_Nonsilent))
# Summarize number of mutant and Other cell lines
crispr_muts_summ <- crispr_muts %>% group_by(Hugo_Symbol) %>%
summarize(N_Deleterious_Other = sum(Mutation_Status_Deleterious == "Other"),
N_Deleterious_Mutant = sum(Mutation_Status_Deleterious == "Mutant"),
N_DeleteriousMissense_Other = sum(Mutation_Status_DeleteriousMissense == "Other"),
N_DeleteriousMissense_Mutant = sum(Mutation_Status_DeleteriousMissense == "Mutant"),
N_Nonsilent_Other = sum(Mutation_Status_Nonsilent == "Other"),
N_Nonsilent_Mutant = sum(Mutation_Status_Nonsilent == "Mutant"))
# Merge test results back into full dataset, which restores information lost in the summarization
crispr_data <- merge(crispr_muts_summ, crispr_muts, by = "Hugo_Symbol")
# Add Color columns
crispr_data$Color_Deleterious <- ifelse(crispr_data$Mutation_Status_Deleterious == "Other", "cyan3", "darkorchid")
crispr_data$Color_Deleterious <- factor(crispr_data$Color_Deleterious)
crispr_data$Color_DeleteriousMissense <- ifelse(crispr_data$Mutation_Status_DeleteriousMissense == "Other", "cyan3", "darkorchid")
crispr_data$Color_DeleteriousMissense <- factor(crispr_data$Color_DeleteriousMissense)
crispr_data$Color_Nonsilent <- ifelse(crispr_data$Mutation_Status_Nonsilent == "Other", "cyan3", "darkorchid")
crispr_data$Color_Nonsilent <- factor(crispr_data$Color_Nonsilent)
# Cell line lineages
crispr_data <- merge(crispr_data, ccl_converter, by = c("CCLE_Name", "Broad_ID"), all.x = TRUE)
levels(crispr_data$lineage_name) <- sort(levels(crispr_data$lineage_name), decreasing = TRUE)
# Copy number
crispr_data <- merge(crispr_data, cn_melt, by = c("Hugo_Symbol", "Broad_ID"), all.x = TRUE)
# Gene expression (RPKM)
ge_filt <- filter(ge_melt, Hugo_Symbol %in% unique(crispr_data$Hugo_Symbol))
crispr_data <- merge(crispr_data, ge_filt, by = c("Hugo_Symbol", "Broad_ID", "CCLE_Name"), all.x = TRUE)
crispr_data$RPKM_log2 <- log2(crispr_data$RPKM + 0.0001)
saveRDS(crispr_data, "./../crispr_lineages_giant_files/crispr_data_18Q3.rds", compress = "xz")
crispr_data <- readRDS("./../crispr_lineages_giant_files/crispr_data_18Q3.rds")
crispr_ccl <- data.frame("Broad_ID" = crispr_data$Broad_ID)
# write.table(crispr_data, file = "~/Desktop/crispr_data.tsv", quote = FALSE, sep = "\t")
Filter for point mutations:
crispr_data_ptmuts <- filter(crispr_data, Var_Length == 1 | is.na(Var_Length))
test_ptmuts <- filter(crispr_data_ptmuts, is.na(group_general_lineage_name))
test_ptmuts <- filter(crispr_data_ptmuts, Hugo_Symbol == "KRAS")
crispr_signif_del <- compare_means(Score ~ Mutation_Status_Deleterious, group.by = c("Hugo_Symbol"), data = crispr_data_ptmuts, method = "wilcox.test", p.adjust.method = "BH")
crispr_signif_del <- adj_signif(crispr_signif_del)
crispr_signif_del <- crispr_signif_del[order(crispr_signif_del$p),]
saveRDS(crispr_signif_del, "./data_munging/rds/crispr_signif_ptmuts_deleterious_gene.rds")
# write.table(crispr_signif_del, file = "~/Desktop/crispr_signif_ptmuts_deleterious_gene.csv", quote = FALSE, sep = ",", row.names = FALSE)
crispr_signif_del <- readRDS("./data_munging/rds/crispr_signif_ptmuts_deleterious_gene.rds")
knitr::kable(filter(crispr_signif_del, p < 0.01)[, c("Hugo_Symbol", "p", "p.adj", "p.format", "p.signif", "p.signif.adj")], caption = "Wilcoxon test results comparing deleterious mutant vs other cell lines, p < 0.01 (BH-adjusted p-values: * p <= 0.05, ** p <= 0.01, *** p <= 0.001, **** p <= 0.0001)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width = "900px", height = "450px")
| Hugo_Symbol | p | p.adj | p.format | p.signif | p.signif.adj |
|---|---|---|---|---|---|
| PTEN | 0.0000000 | 0.0000000 | 1.5e-12 | **** | **** |
| TP53 | 0.0000008 | 0.0050305 | 7.7e-07 | **** | ** |
| ARID1A | 0.0000061 | 0.0265330 | 6.1e-06 | **** |
|
| UTP20 | 0.0000785 | 0.2560978 | 7.8e-05 | **** | NA |
| VHL | 0.0001077 | 0.2812367 | 0.00011 | *** | NA |
| ARID1B | 0.0003780 | 0.7461832 | 0.00038 | *** | NA |
| ZC3H13 | 0.0009610 | 0.7461832 | 0.00096 | *** | NA |
| ABCD4 | 0.0012457 | 0.7461832 | 0.00125 | ** | NA |
| RB1 | 0.0016928 | 0.7461832 | 0.00169 | ** | NA |
| RNF208 | 0.0019385 | 0.7461832 | 0.00194 | ** | NA |
| DENND1A | 0.0020308 | 0.7461832 | 0.00203 | ** | NA |
| AJAP1 | 0.0021075 | 0.7461832 | 0.00211 | ** | NA |
| HAX1 | 0.0022315 | 0.7461832 | 0.00223 | ** | NA |
| NIPBL | 0.0022374 | 0.7461832 | 0.00224 | ** | NA |
| TIMD4 | 0.0022527 | 0.7461832 | 0.00225 | ** | NA |
| ULK1 | 0.0023004 | 0.7461832 | 0.00230 | ** | NA |
| PIGR | 0.0028348 | 0.7461832 | 0.00283 | ** | NA |
| TP53BP2 | 0.0029604 | 0.7461832 | 0.00296 | ** | NA |
| KLHL17 | 0.0038240 | 0.7461832 | 0.00382 | ** | NA |
| TUBGCP5 | 0.0039141 | 0.7461832 | 0.00391 | ** | NA |
| FMN1 | 0.0039763 | 0.7461832 | 0.00398 | ** | NA |
| AKAP13 | 0.0040426 | 0.7461832 | 0.00404 | ** | NA |
| PIK3R1 | 0.0040474 | 0.7461832 | 0.00405 | ** | NA |
| UGT2A3 | 0.0041569 | 0.7461832 | 0.00416 | ** | NA |
| ATAD5 | 0.0045229 | 0.7461832 | 0.00452 | ** | NA |
| TCERG1 | 0.0045495 | 0.7461832 | 0.00455 | ** | NA |
| KRT19 | 0.0046981 | 0.7461832 | 0.00470 | ** | NA |
| PPIL4 | 0.0047926 | 0.7461832 | 0.00479 | ** | NA |
| CR1 | 0.0048029 | 0.7461832 | 0.00480 | ** | NA |
| KIAA1731 | 0.0048110 | 0.7461832 | 0.00481 | ** | NA |
| RINT1 | 0.0048906 | 0.7461832 | 0.00489 | ** | NA |
| SIN3A | 0.0049114 | 0.7461832 | 0.00491 | ** | NA |
| DLD | 0.0053771 | 0.7461832 | 0.00538 | ** | NA |
| NPSR1 | 0.0054137 | 0.7461832 | 0.00541 | ** | NA |
| PTPRR | 0.0055220 | 0.7461832 | 0.00552 | ** | NA |
| CSTF3 | 0.0055891 | 0.7461832 | 0.00559 | ** | NA |
| GSTM5 | 0.0058384 | 0.7461832 | 0.00584 | ** | NA |
| OLFML2B | 0.0058575 | 0.7461832 | 0.00586 | ** | NA |
| ARID5B | 0.0059256 | 0.7461832 | 0.00593 | ** | NA |
| TDRD7 | 0.0061289 | 0.7461832 | 0.00613 | ** | NA |
| NOC4L | 0.0061581 | 0.7461832 | 0.00616 | ** | NA |
| KIAA1107 | 0.0061991 | 0.7461832 | 0.00620 | ** | NA |
| SLC28A2 | 0.0062191 | 0.7461832 | 0.00622 | ** | NA |
| PLXNA1 | 0.0062851 | 0.7461832 | 0.00629 | ** | NA |
| PLOD3 | 0.0062880 | 0.7461832 | 0.00629 | ** | NA |
| ZNF439 | 0.0063457 | 0.7461832 | 0.00635 | ** | NA |
| DMKN | 0.0063484 | 0.7461832 | 0.00635 | ** | NA |
| EDNRB | 0.0063517 | 0.7461832 | 0.00635 | ** | NA |
| UTP3 | 0.0065359 | 0.7461832 | 0.00654 | ** | NA |
| MSH6 | 0.0066097 | 0.7461832 | 0.00661 | ** | NA |
| ELTD1 | 0.0066592 | 0.7461832 | 0.00666 | ** | NA |
| ST7 | 0.0068047 | 0.7461832 | 0.00680 | ** | NA |
| IQCH | 0.0068191 | 0.7461832 | 0.00682 | ** | NA |
| LILRB2 | 0.0071039 | 0.7461832 | 0.00710 | ** | NA |
| BRD4 | 0.0072117 | 0.7461832 | 0.00721 | ** | NA |
| UTS2B | 0.0072533 | 0.7461832 | 0.00725 | ** | NA |
| KLHL1 | 0.0073975 | 0.7461832 | 0.00740 | ** | NA |
| RAD50 | 0.0075005 | 0.7461832 | 0.00750 | ** | NA |
| AMER2 | 0.0076915 | 0.7461832 | 0.00769 | ** | NA |
| DMWD | 0.0077231 | 0.7461832 | 0.00772 | ** | NA |
| ATG2A | 0.0077283 | 0.7461832 | 0.00773 | ** | NA |
| NPAT | 0.0080167 | 0.7461832 | 0.00802 | ** | NA |
| SMARCB1 | 0.0081997 | 0.7461832 | 0.00820 | ** | NA |
| PAQR3 | 0.0082276 | 0.7461832 | 0.00823 | ** | NA |
| SF3B2 | 0.0084940 | 0.7461832 | 0.00849 | ** | NA |
| NGRN | 0.0085338 | 0.7461832 | 0.00853 | ** | NA |
| SKOR1 | 0.0085338 | 0.7461832 | 0.00853 | ** | NA |
| PHLPP1 | 0.0085991 | 0.7461832 | 0.00860 | ** | NA |
| ZBTB25 | 0.0086381 | 0.7461832 | 0.00864 | ** | NA |
| ZNF124 | 0.0089578 | 0.7461832 | 0.00896 | ** | NA |
| TOP1MT | 0.0089680 | 0.7461832 | 0.00897 | ** | NA |
| THEMIS | 0.0092069 | 0.7461832 | 0.00921 | ** | NA |
| ESYT2 | 0.0092528 | 0.7461832 | 0.00925 | ** | NA |
| SRPR | 0.0092882 | 0.7461832 | 0.00929 | ** | NA |
| PAX5 | 0.0093106 | 0.7461832 | 0.00931 | ** | NA |
| TTC22 | 0.0094474 | 0.7461832 | 0.00945 | ** | NA |
| BUB1 | 0.0095144 | 0.7461832 | 0.00951 | ** | NA |
| RIMBP3 | 0.0096462 | 0.7461832 | 0.00965 | ** | NA |
wilcox_gene_plot <- ggplot(data = crispr_signif_del) +
geom_histogram(aes(x = p, fill = "chartreuse4"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
geom_histogram(aes(x = p.adj, fill = "darkslategray3"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
scale_x_continuous(breaks = seq(0, 1, by = 0.05), labels = seq(0, 1, by = 0.05)) +
scale_fill_manual(name = "P-values", values = c("chartreuse4" = "chartreuse4", "darkslategray3" = "darkslategray3"), labels = c("Unadjusted", "BH-adjusted")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = c(0.1, 0.85)) +
labs(x = "BH-adjusted p-values", y = "Frequency")
wilcox_gene_plot
crispr_signif_nonsilent <- compare_means(Score ~ Mutation_Status_Nonsilent, group.by = c("Hugo_Symbol"), data = crispr_data_ptmuts, method = "wilcox.test", p.adjust.method = "BH")
crispr_signif_nonsilent <- adj_signif(crispr_signif_nonsilent)
crispr_signif_nonsilent <- crispr_signif_nonsilent[order(crispr_signif_nonsilent$p),]
saveRDS(crispr_signif_nonsilent, "./data_munging/rds/crispr_signif_ptmuts_nonsilent_gene.rds")
# write.table(crispr_signif_nonsilent, file = "~/Desktop/crispr_signif_ptmuts_nonsilent_gene.csv", quote = FALSE, sep = ",", row.names = FALSE)
crispr_signif_nonsilent <- readRDS("./data_munging/rds/crispr_signif_ptmuts_nonsilent_gene.rds")
knitr::kable(filter(crispr_signif_nonsilent, p < 0.01)[, c("Hugo_Symbol", "p", "p.adj", "p.format", "p.signif", "p.signif.adj")], caption = "Wilcoxon test results comparing non-silent mutant vs other cell lines, p < 0.01 (BH-adjusted p-values: * p <= 0.05, ** p <= 0.01, *** p <= 0.001, **** p <= 0.0001)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width = "900px", height = "450px")
| Hugo_Symbol | p | p.adj | p.format | p.signif | p.signif.adj |
|---|---|---|---|---|---|
| TP53 | 0.0000000 | 0.0000000 | < 2e-16 | **** | **** |
| KRAS | 0.0000000 | 0.0000000 | < 2e-16 | **** | **** |
| NRAS | 0.0000000 | 0.0000000 | < 2e-16 | **** | **** |
| BRAF | 0.0000000 | 0.0000000 | < 2e-16 | **** | **** |
| PTEN | 0.0000000 | 0.0000000 | 2.1e-15 | **** | **** |
| PIK3CA | 0.0000000 | 0.0000000 | 9.6e-14 | **** | **** |
| CTNNB1 | 0.0000058 | 0.0143321 | 5.8e-06 | **** |
|
| TCERG1 | 0.0000135 | 0.0290351 | 1.4e-05 | **** |
|
| ARID1A | 0.0000332 | 0.0633052 | 3.3e-05 | **** | NA |
| FCGBP | 0.0000696 | 0.1195154 | 7.0e-05 | **** | NA |
| TPR | 0.0000827 | 0.1213965 | 8.3e-05 | **** | NA |
| PIK3R1 | 0.0000848 | 0.1213965 | 8.5e-05 | **** | NA |
| TAOK2 | 0.0002289 | 0.3025013 | 0.00023 | *** | NA |
| SLC22A9 | 0.0002503 | 0.3032257 | 0.00025 | *** | NA |
| GSTM5 | 0.0002648 | 0.3032257 | 0.00026 | *** | NA |
| CPSF1 | 0.0003055 | 0.3280113 | 0.00031 | *** | NA |
| NIPBL | 0.0003521 | 0.3306423 | 0.00035 | *** | NA |
| PIGW | 0.0003638 | 0.3306423 | 0.00036 | *** | NA |
| C14orf39 | 0.0003657 | 0.3306423 | 0.00037 | *** | NA |
| TLX2 | 0.0004787 | 0.4111424 | 0.00048 | *** | NA |
| ARFGAP1 | 0.0005157 | 0.4156420 | 0.00052 | *** | NA |
| ZNF808 | 0.0005323 | 0.4156420 | 0.00053 | *** | NA |
| CD320 | 0.0005816 | 0.4343665 | 0.00058 | *** | NA |
| MAT2B | 0.0007184 | 0.5086287 | 0.00072 | *** | NA |
| ZNF177 | 0.0007543 | 0.5086287 | 0.00075 | *** | NA |
| SYNPO2L | 0.0007699 | 0.5086287 | 0.00077 | *** | NA |
| HRAS | 0.0008408 | 0.5244580 | 0.00084 | *** | NA |
| UTP3 | 0.0008549 | 0.5244580 | 0.00085 | *** | NA |
| RNF208 | 0.0008907 | 0.5275698 | 0.00089 | *** | NA |
| KCNIP4 | 0.0009472 | 0.5423384 | 0.00095 | *** | NA |
| GOLGA3 | 0.0010340 | 0.5457674 | 0.00103 | ** | NA |
| DSEL | 0.0010395 | 0.5457674 | 0.00104 | ** | NA |
| PROM1 | 0.0010485 | 0.5457674 | 0.00105 | ** | NA |
| PMPCA | 0.0010990 | 0.5552227 | 0.00110 | ** | NA |
| OR52E8 | 0.0012095 | 0.5724468 | 0.00121 | ** | NA |
| OR13C4 | 0.0012702 | 0.5724468 | 0.00127 | ** | NA |
| TELO2 | 0.0012922 | 0.5724468 | 0.00129 | ** | NA |
| TAS1R2 | 0.0013163 | 0.5724468 | 0.00132 | ** | NA |
| ZNF439 | 0.0013227 | 0.5724468 | 0.00132 | ** | NA |
| MDN1 | 0.0013858 | 0.5724468 | 0.00139 | ** | NA |
| MTMR2 | 0.0014414 | 0.5724468 | 0.00144 | ** | NA |
| E2F1 | 0.0014486 | 0.5724468 | 0.00145 | ** | NA |
| LPO | 0.0014636 | 0.5724468 | 0.00146 | ** | NA |
| ING2 | 0.0014811 | 0.5724468 | 0.00148 | ** | NA |
| PLA2G4F | 0.0014997 | 0.5724468 | 0.00150 | ** | NA |
| MEF2C | 0.0015423 | 0.5738410 | 0.00154 | ** | NA |
| UTP20 | 0.0015804 | 0.5738410 | 0.00158 | ** | NA |
| IQCH | 0.0016077 | 0.5738410 | 0.00161 | ** | NA |
| PCDHB15 | 0.0016370 | 0.5738410 | 0.00164 | ** | NA |
| ACSM2B | 0.0017107 | 0.5804214 | 0.00171 | ** | NA |
| OR8D1 | 0.0017233 | 0.5804214 | 0.00172 | ** | NA |
| RFC1 | 0.0018888 | 0.6183553 | 0.00189 | ** | NA |
| KIAA1211L | 0.0019402 | 0.6183553 | 0.00194 | ** | NA |
| SENP8 | 0.0019559 | 0.6183553 | 0.00196 | ** | NA |
| RUVBL1 | 0.0019950 | 0.6183553 | 0.00200 | ** | NA |
| CLEC4C | 0.0020667 | 0.6183553 | 0.00207 | ** | NA |
| PCDHA8 | 0.0020705 | 0.6183553 | 0.00207 | ** | NA |
| PI4K2A | 0.0020961 | 0.6183553 | 0.00210 | ** | NA |
| KPNA6 | 0.0021334 | 0.6183553 | 0.00213 | ** | NA |
| POU2F1 | 0.0022102 | 0.6183553 | 0.00221 | ** | NA |
| BTBD11 | 0.0022318 | 0.6183553 | 0.00223 | ** | NA |
| FBXW7 | 0.0023109 | 0.6183553 | 0.00231 | ** | NA |
| ANKRD31 | 0.0023475 | 0.6183553 | 0.00235 | ** | NA |
| OR51T1 | 0.0023614 | 0.6183553 | 0.00236 | ** | NA |
| METTL17 | 0.0023923 | 0.6183553 | 0.00239 | ** | NA |
| CIITA | 0.0024057 | 0.6183553 | 0.00241 | ** | NA |
| ADAM15 | 0.0024806 | 0.6183553 | 0.00248 | ** | NA |
| TUBB2A | 0.0025373 | 0.6183553 | 0.00254 | ** | NA |
| RAB43 | 0.0025433 | 0.6183553 | 0.00254 | ** | NA |
| TAAR1 | 0.0025653 | 0.6183553 | 0.00257 | ** | NA |
| EZH2 | 0.0025733 | 0.6183553 | 0.00257 | ** | NA |
| ZDHHC24 | 0.0025919 | 0.6183553 | 0.00259 | ** | NA |
| SURF6 | 0.0027361 | 0.6245468 | 0.00274 | ** | NA |
| KIF11 | 0.0027512 | 0.6245468 | 0.00275 | ** | NA |
| VHL | 0.0027587 | 0.6245468 | 0.00276 | ** | NA |
| GRM7 | 0.0027641 | 0.6245468 | 0.00276 | ** | NA |
| AVEN | 0.0027997 | 0.6245468 | 0.00280 | ** | NA |
| OLFML2B | 0.0029959 | 0.6248726 | 0.00300 | ** | NA |
| SAMM50 | 0.0030102 | 0.6248726 | 0.00301 | ** | NA |
| ACTA1 | 0.0030465 | 0.6248726 | 0.00305 | ** | NA |
| FRMD4B | 0.0030803 | 0.6248726 | 0.00308 | ** | NA |
| SLC26A2 | 0.0030940 | 0.6248726 | 0.00309 | ** | NA |
| MYH13 | 0.0032183 | 0.6248726 | 0.00322 | ** | NA |
| LTC4S | 0.0032514 | 0.6248726 | 0.00325 | ** | NA |
| MAP2K3 | 0.0032831 | 0.6248726 | 0.00328 | ** | NA |
| DIS3L2 | 0.0032847 | 0.6248726 | 0.00328 | ** | NA |
| PTPRE | 0.0032929 | 0.6248726 | 0.00329 | ** | NA |
| CNOT1 | 0.0032940 | 0.6248726 | 0.00329 | ** | NA |
| ANKRD32 | 0.0034117 | 0.6248726 | 0.00341 | ** | NA |
| SCG2 | 0.0034839 | 0.6248726 | 0.00348 | ** | NA |
| TRIM8 | 0.0034895 | 0.6248726 | 0.00349 | ** | NA |
| CAMK2B | 0.0035496 | 0.6248726 | 0.00355 | ** | NA |
| ETAA1 | 0.0035617 | 0.6248726 | 0.00356 | ** | NA |
| ST6GALNAC1 | 0.0035708 | 0.6248726 | 0.00357 | ** | NA |
| LILRB2 | 0.0036287 | 0.6248726 | 0.00363 | ** | NA |
| SLC38A7 | 0.0036411 | 0.6248726 | 0.00364 | ** | NA |
| SOX9 | 0.0036519 | 0.6248726 | 0.00365 | ** | NA |
| MMRN2 | 0.0036540 | 0.6248726 | 0.00365 | ** | NA |
| MAML2 | 0.0037079 | 0.6248726 | 0.00371 | ** | NA |
| POLG | 0.0037209 | 0.6248726 | 0.00372 | ** | NA |
| SGCD | 0.0037645 | 0.6248726 | 0.00376 | ** | NA |
| KRT83 | 0.0038147 | 0.6248726 | 0.00381 | ** | NA |
| RB1 | 0.0038228 | 0.6248726 | 0.00382 | ** | NA |
| SPAG17 | 0.0038410 | 0.6248726 | 0.00384 | ** | NA |
| EEF2 | 0.0038473 | 0.6248726 | 0.00385 | ** | NA |
| SUSD5 | 0.0039261 | 0.6248726 | 0.00393 | ** | NA |
| ATR | 0.0039338 | 0.6248726 | 0.00393 | ** | NA |
| ATXN2L | 0.0041364 | 0.6248726 | 0.00414 | ** | NA |
| CD83 | 0.0041819 | 0.6248726 | 0.00418 | ** | NA |
| TOP1MT | 0.0042412 | 0.6248726 | 0.00424 | ** | NA |
| EMILIN3 | 0.0042659 | 0.6248726 | 0.00427 | ** | NA |
| SPHK2 | 0.0043100 | 0.6248726 | 0.00431 | ** | NA |
| PSMB4 | 0.0043587 | 0.6248726 | 0.00436 | ** | NA |
| ACHE | 0.0043818 | 0.6248726 | 0.00438 | ** | NA |
| ZNF135 | 0.0045686 | 0.6248726 | 0.00457 | ** | NA |
| TPP1 | 0.0046303 | 0.6248726 | 0.00463 | ** | NA |
| NFE2L1 | 0.0046446 | 0.6248726 | 0.00464 | ** | NA |
| KHSRP | 0.0046808 | 0.6248726 | 0.00468 | ** | NA |
| PPP1R1C | 0.0047398 | 0.6248726 | 0.00474 | ** | NA |
| CDC37 | 0.0047810 | 0.6248726 | 0.00478 | ** | NA |
| CD200R1 | 0.0048862 | 0.6248726 | 0.00489 | ** | NA |
| CWH43 | 0.0048913 | 0.6248726 | 0.00489 | ** | NA |
| FOXRED1 | 0.0049144 | 0.6248726 | 0.00491 | ** | NA |
| HCRTR1 | 0.0049160 | 0.6248726 | 0.00492 | ** | NA |
| ZC3H4 | 0.0050449 | 0.6248726 | 0.00504 | ** | NA |
| RBBP9 | 0.0050826 | 0.6248726 | 0.00508 | ** | NA |
| OGFOD3 | 0.0051338 | 0.6248726 | 0.00513 | ** | NA |
| TBC1D22A | 0.0052429 | 0.6248726 | 0.00524 | ** | NA |
| APOBEC1 | 0.0052779 | 0.6248726 | 0.00528 | ** | NA |
| OR56A3 | 0.0052916 | 0.6248726 | 0.00529 | ** | NA |
| SLC46A2 | 0.0053108 | 0.6248726 | 0.00531 | ** | NA |
| STK17B | 0.0053176 | 0.6248726 | 0.00532 | ** | NA |
| PIGP | 0.0053363 | 0.6248726 | 0.00534 | ** | NA |
| C6orf15 | 0.0053410 | 0.6248726 | 0.00534 | ** | NA |
| SMPD1 | 0.0053513 | 0.6248726 | 0.00535 | ** | NA |
| RNF43 | 0.0053669 | 0.6248726 | 0.00537 | ** | NA |
| FBXW12 | 0.0053691 | 0.6248726 | 0.00537 | ** | NA |
| RSPH6A | 0.0053903 | 0.6248726 | 0.00539 | ** | NA |
| THADA | 0.0055256 | 0.6248726 | 0.00553 | ** | NA |
| MYOC | 0.0055382 | 0.6248726 | 0.00554 | ** | NA |
| SLC12A4 | 0.0056227 | 0.6248726 | 0.00562 | ** | NA |
| BTAF1 | 0.0056982 | 0.6248726 | 0.00570 | ** | NA |
| DNAJC5B | 0.0057016 | 0.6248726 | 0.00570 | ** | NA |
| TAS2R60 | 0.0057034 | 0.6248726 | 0.00570 | ** | NA |
| IBA57 | 0.0057420 | 0.6248726 | 0.00574 | ** | NA |
| HSD3B7 | 0.0057475 | 0.6248726 | 0.00575 | ** | NA |
| HDGFRP2 | 0.0057596 | 0.6248726 | 0.00576 | ** | NA |
| MRPS34 | 0.0057928 | 0.6248726 | 0.00579 | ** | NA |
| SSH2 | 0.0058405 | 0.6248726 | 0.00584 | ** | NA |
| MBD3L2 | 0.0058844 | 0.6248726 | 0.00588 | ** | NA |
| COPS2 | 0.0059315 | 0.6248726 | 0.00593 | ** | NA |
| WDR75 | 0.0059969 | 0.6248726 | 0.00600 | ** | NA |
| FRYL | 0.0060235 | 0.6248726 | 0.00602 | ** | NA |
| MUS81 | 0.0061273 | 0.6248726 | 0.00613 | ** | NA |
| CD5L | 0.0061772 | 0.6248726 | 0.00618 | ** | NA |
| DPP7 | 0.0061926 | 0.6248726 | 0.00619 | ** | NA |
| LMX1A | 0.0062097 | 0.6248726 | 0.00621 | ** | NA |
| ANTXRL | 0.0062215 | 0.6248726 | 0.00622 | ** | NA |
| MMS22L | 0.0062232 | 0.6248726 | 0.00622 | ** | NA |
| SFXN3 | 0.0062354 | 0.6248726 | 0.00624 | ** | NA |
| USP32 | 0.0062588 | 0.6248726 | 0.00626 | ** | NA |
| FAM181B | 0.0062849 | 0.6248726 | 0.00628 | ** | NA |
| SMARCB1 | 0.0062993 | 0.6248726 | 0.00630 | ** | NA |
| ACBD6 | 0.0063036 | 0.6248726 | 0.00630 | ** | NA |
| SUPT7L | 0.0063204 | 0.6248726 | 0.00632 | ** | NA |
| CYP19A1 | 0.0063281 | 0.6248726 | 0.00633 | ** | NA |
| C12orf4 | 0.0063501 | 0.6248726 | 0.00635 | ** | NA |
| FBXL20 | 0.0064106 | 0.6248726 | 0.00641 | ** | NA |
| NXPH4 | 0.0064264 | 0.6248726 | 0.00643 | ** | NA |
| CCNB1 | 0.0065022 | 0.6248726 | 0.00650 | ** | NA |
| LRGUK | 0.0065445 | 0.6248726 | 0.00654 | ** | NA |
| OR4N2 | 0.0065455 | 0.6248726 | 0.00655 | ** | NA |
| RGS12 | 0.0065563 | 0.6248726 | 0.00656 | ** | NA |
| CEP76 | 0.0066070 | 0.6248726 | 0.00661 | ** | NA |
| MIIP | 0.0066145 | 0.6248726 | 0.00661 | ** | NA |
| FCN3 | 0.0066248 | 0.6248726 | 0.00662 | ** | NA |
| ZNF107 | 0.0066752 | 0.6248726 | 0.00668 | ** | NA |
| TACSTD2 | 0.0066861 | 0.6248726 | 0.00669 | ** | NA |
| GLG1 | 0.0066950 | 0.6248726 | 0.00670 | ** | NA |
| TSPAN13 | 0.0067540 | 0.6248726 | 0.00675 | ** | NA |
| VPS13D | 0.0067657 | 0.6248726 | 0.00677 | ** | NA |
| LPCAT4 | 0.0067758 | 0.6248726 | 0.00678 | ** | NA |
| ATXN2 | 0.0068586 | 0.6248726 | 0.00686 | ** | NA |
| DENND4C | 0.0068602 | 0.6248726 | 0.00686 | ** | NA |
| ARID1B | 0.0068718 | 0.6248726 | 0.00687 | ** | NA |
| KDM5B | 0.0068837 | 0.6248726 | 0.00688 | ** | NA |
| IFFO2 | 0.0068857 | 0.6248726 | 0.00689 | ** | NA |
| COG3 | 0.0069083 | 0.6248726 | 0.00691 | ** | NA |
| DHX35 | 0.0069214 | 0.6248726 | 0.00692 | ** | NA |
| PPM1A | 0.0069353 | 0.6248726 | 0.00694 | ** | NA |
| MSGN1 | 0.0069483 | 0.6248726 | 0.00695 | ** | NA |
| POC1A | 0.0071368 | 0.6341224 | 0.00714 | ** | NA |
| CLDN5 | 0.0071709 | 0.6341224 | 0.00717 | ** | NA |
| PRKCZ | 0.0072554 | 0.6341224 | 0.00726 | ** | NA |
| ACTL8 | 0.0072969 | 0.6341224 | 0.00730 | ** | NA |
| MGA | 0.0073204 | 0.6341224 | 0.00732 | ** | NA |
| GTF3C1 | 0.0073309 | 0.6341224 | 0.00733 | ** | NA |
| SMCR8 | 0.0073359 | 0.6341224 | 0.00734 | ** | NA |
| HSPB2 | 0.0073660 | 0.6341224 | 0.00737 | ** | NA |
| EML1 | 0.0073834 | 0.6341224 | 0.00738 | ** | NA |
| INTS12 | 0.0075473 | 0.6405098 | 0.00755 | ** | NA |
| NAA15 | 0.0075638 | 0.6405098 | 0.00756 | ** | NA |
| SPTBN4 | 0.0075987 | 0.6405098 | 0.00760 | ** | NA |
| ARSB | 0.0076069 | 0.6405098 | 0.00761 | ** | NA |
| VPS37B | 0.0076704 | 0.6427055 | 0.00767 | ** | NA |
| CREBBP | 0.0077132 | 0.6431501 | 0.00771 | ** | NA |
| NCR3LG1 | 0.0078580 | 0.6514880 | 0.00786 | ** | NA |
| ZNF493 | 0.0078890 | 0.6514880 | 0.00789 | ** | NA |
| AP5B1 | 0.0079487 | 0.6523618 | 0.00795 | ** | NA |
| ACYP2 | 0.0079841 | 0.6523618 | 0.00798 | ** | NA |
| C20orf26 | 0.0080463 | 0.6523618 | 0.00805 | ** | NA |
| PLOD1 | 0.0080515 | 0.6523618 | 0.00805 | ** | NA |
| OR4S2 | 0.0081035 | 0.6534954 | 0.00810 | ** | NA |
| SPO11 | 0.0081886 | 0.6568749 | 0.00819 | ** | NA |
| MCTP1 | 0.0082300 | 0.6568749 | 0.00823 | ** | NA |
| ELK3 | 0.0083459 | 0.6568749 | 0.00835 | ** | NA |
| H2AFY2 | 0.0083475 | 0.6568749 | 0.00835 | ** | NA |
| MRPL13 | 0.0083762 | 0.6568749 | 0.00838 | ** | NA |
| PAQR3 | 0.0084147 | 0.6568749 | 0.00841 | ** | NA |
| GHITM | 0.0084474 | 0.6568749 | 0.00845 | ** | NA |
| ADO | 0.0085378 | 0.6568749 | 0.00854 | ** | NA |
| MFAP4 | 0.0085654 | 0.6568749 | 0.00857 | ** | NA |
| CD163 | 0.0085663 | 0.6568749 | 0.00857 | ** | NA |
| ARMC7 | 0.0085988 | 0.6568749 | 0.00860 | ** | NA |
| COL8A1 | 0.0086043 | 0.6568749 | 0.00860 | ** | NA |
| NBPF15 | 0.0086920 | 0.6606339 | 0.00869 | ** | NA |
| OR13C3 | 0.0088398 | 0.6659977 | 0.00884 | ** | NA |
| COL7A1 | 0.0088402 | 0.6659977 | 0.00884 | ** | NA |
| C1orf141 | 0.0089498 | 0.6698168 | 0.00895 | ** | NA |
| ARHGEF17 | 0.0089894 | 0.6698168 | 0.00899 | ** | NA |
| COL9A2 | 0.0090078 | 0.6698168 | 0.00901 | ** | NA |
| DTHD1 | 0.0090748 | 0.6699801 | 0.00907 | ** | NA |
| LRP12 | 0.0090984 | 0.6699801 | 0.00910 | ** | NA |
| CCNL1 | 0.0091597 | 0.6699801 | 0.00916 | ** | NA |
| TMEM258 | 0.0091769 | 0.6699801 | 0.00918 | ** | NA |
| DNAJC8 | 0.0093058 | 0.6699801 | 0.00931 | ** | NA |
| KRTAP4-4 | 0.0093494 | 0.6699801 | 0.00935 | ** | NA |
| LCTL | 0.0093837 | 0.6699801 | 0.00938 | ** | NA |
| ELMSAN1 | 0.0093954 | 0.6699801 | 0.00940 | ** | NA |
| SGK2 | 0.0093988 | 0.6699801 | 0.00940 | ** | NA |
| SLC25A4 | 0.0094344 | 0.6699801 | 0.00943 | ** | NA |
| F11R | 0.0094708 | 0.6699801 | 0.00947 | ** | NA |
| LY6G6F | 0.0095144 | 0.6699801 | 0.00951 | ** | NA |
| ZNF330 | 0.0095171 | 0.6699801 | 0.00952 | ** | NA |
| NECAB1 | 0.0095689 | 0.6708810 | 0.00957 | ** | NA |
| GMCL1 | 0.0096195 | 0.6710890 | 0.00962 | ** | NA |
| C16orf58 | 0.0096729 | 0.6710890 | 0.00967 | ** | NA |
| ZBTB44 | 0.0096904 | 0.6710890 | 0.00969 | ** | NA |
| INHBA | 0.0097282 | 0.6710890 | 0.00973 | ** | NA |
| RNF31 | 0.0098119 | 0.6727895 | 0.00981 | ** | NA |
| CACNA1I | 0.0098312 | 0.6727895 | 0.00983 | ** | NA |
| COPS3 | 0.0098996 | 0.6747558 | 0.00990 | ** | NA |
| NOL6 | 0.0099717 | 0.6747558 | 0.00997 | ** | NA |
| WDR77 | 0.0099778 | 0.6747558 | 0.00998 | ** | NA |
wilcox_gene_nonsilent_plot <- ggplot(data = crispr_signif_nonsilent) +
geom_histogram(aes(x = p, fill = "chartreuse4"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
geom_histogram(aes(x = p.adj, fill = "darkslategray3"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
scale_x_continuous(breaks = seq(0, 1, by = 0.05), labels = seq(0, 1, by = 0.05)) +
scale_fill_manual(name = "P-values", values = c("chartreuse4" = "chartreuse4", "darkslategray3" = "darkslategray3"), labels = c("Unadjusted", "BH-adjusted")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = c(0.1, 0.85)) +
labs(x = "BH-adjusted p-values", y = "Frequency")
wilcox_gene_nonsilent_plot
crispr_signif_delmis <- compare_means(Score ~ Mutation_Status_DeleteriousMissense, group.by = c("Hugo_Symbol"), data = crispr_data_ptmuts, method = "wilcox.test", p.adjust.method = "BH")
crispr_signif_delmis <- adj_signif(crispr_signif_delmis)
crispr_signif_delmis <- crispr_signif_delmis[order(crispr_signif_delmis$p),]
saveRDS(crispr_signif_delmis, "./data_munging/rds/crispr_signif_ptmuts_deleteriousmissense_gene.rds")
# write.table(crispr_signif_delmis, file = "~/Desktop/crispr_signif_ptmuts_deleteriousmissense_gene.csv", quote = FALSE, sep = ",", row.names = FALSE)
crispr_signif_delmis <- readRDS("./data_munging/rds/crispr_signif_ptmuts_deleteriousmissense_gene.rds")
knitr::kable(filter(crispr_signif_delmis, p < 0.01)[, c("Hugo_Symbol", "p", "p.adj", "p.format", "p.signif", "p.signif.adj")], caption = "Wilcoxon test results comparing deleterious and missense mutant vs other cell lines, p < 0.01 (BH-adjusted p-values: * p <= 0.05, ** p <= 0.01, *** p <= 0.001, **** p <= 0.0001)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width = "900px", height = "450px")
| Hugo_Symbol | p | p.adj | p.format | p.signif | p.signif.adj |
|---|---|---|---|---|---|
| KRAS | 0.0000000 | 0.0000000 | < 2e-16 | **** | **** |
| TP53 | 0.0000000 | 0.0000000 | < 2e-16 | **** | **** |
| NRAS | 0.0000000 | 0.0000000 | < 2e-16 | **** | **** |
| BRAF | 0.0000000 | 0.0000000 | < 2e-16 | **** | **** |
| PTEN | 0.0000000 | 0.0000000 | 2.1e-15 | **** | **** |
| PIK3CA | 0.0000000 | 0.0000000 | 9.6e-14 | **** | **** |
| CTNNB1 | 0.0000058 | 0.0143296 | 5.8e-06 | **** |
|
| TCERG1 | 0.0000135 | 0.0290301 | 1.4e-05 | **** |
|
| ARID1A | 0.0000332 | 0.0632941 | 3.3e-05 | **** | NA |
| FCGBP | 0.0000696 | 0.1194946 | 7.0e-05 | **** | NA |
| TPR | 0.0000827 | 0.1291821 | 8.3e-05 | **** | NA |
| SLC22A9 | 0.0002503 | 0.3498147 | 0.00025 | *** | NA |
| GSTM5 | 0.0002648 | 0.3498147 | 0.00026 | *** | NA |
| CPSF1 | 0.0003055 | 0.3748046 | 0.00031 | *** | NA |
| PIGW | 0.0003638 | 0.3925692 | 0.00036 | *** | NA |
| C14orf39 | 0.0003657 | 0.3925692 | 0.00037 | *** | NA |
| TLX2 | 0.0004787 | 0.4540312 | 0.00048 | *** | NA |
| NIPBL | 0.0005034 | 0.4540312 | 0.00050 | *** | NA |
| ARFGAP1 | 0.0005157 | 0.4540312 | 0.00052 | *** | NA |
| ZNF808 | 0.0005323 | 0.4540312 | 0.00053 | *** | NA |
| TAOK2 | 0.0005756 | 0.4540312 | 0.00058 | *** | NA |
| CD320 | 0.0005816 | 0.4540312 | 0.00058 | *** | NA |
| MAT2B | 0.0007184 | 0.5125383 | 0.00072 | *** | NA |
| ZNF177 | 0.0007543 | 0.5125383 | 0.00075 | *** | NA |
| SYNPO2L | 0.0007699 | 0.5125383 | 0.00077 | *** | NA |
| HRAS | 0.0008408 | 0.5125383 | 0.00084 | *** | NA |
| UTP3 | 0.0008549 | 0.5125383 | 0.00085 | *** | NA |
| PIK3R1 | 0.0008568 | 0.5125383 | 0.00086 | *** | NA |
| RNF208 | 0.0008907 | 0.5125383 | 0.00089 | *** | NA |
| FBXW7 | 0.0008953 | 0.5125383 | 0.00090 | *** | NA |
| KCNIP4 | 0.0009472 | 0.5247519 | 0.00095 | *** | NA |
| GOLGA3 | 0.0010340 | 0.5296229 | 0.00103 | ** | NA |
| DSEL | 0.0010395 | 0.5296229 | 0.00104 | ** | NA |
| PROM1 | 0.0010485 | 0.5296229 | 0.00105 | ** | NA |
| PMPCA | 0.0010990 | 0.5392650 | 0.00110 | ** | NA |
| OR52E8 | 0.0012095 | 0.5479916 | 0.00121 | ** | NA |
| OR13C4 | 0.0012702 | 0.5479916 | 0.00127 | ** | NA |
| TELO2 | 0.0012922 | 0.5479916 | 0.00129 | ** | NA |
| TAS1R2 | 0.0013163 | 0.5479916 | 0.00132 | ** | NA |
| ZNF439 | 0.0013227 | 0.5479916 | 0.00132 | ** | NA |
| MDN1 | 0.0013858 | 0.5479916 | 0.00139 | ** | NA |
| MTMR2 | 0.0014414 | 0.5479916 | 0.00144 | ** | NA |
| E2F1 | 0.0014486 | 0.5479916 | 0.00145 | ** | NA |
| LPO | 0.0014636 | 0.5479916 | 0.00146 | ** | NA |
| ING2 | 0.0014811 | 0.5479916 | 0.00148 | ** | NA |
| VHL | 0.0014920 | 0.5479916 | 0.00149 | ** | NA |
| PLA2G4F | 0.0014997 | 0.5479916 | 0.00150 | ** | NA |
| MEF2C | 0.0015423 | 0.5512411 | 0.00154 | ** | NA |
| UTP20 | 0.0015804 | 0.5512411 | 0.00158 | ** | NA |
| IQCH | 0.0016077 | 0.5512411 | 0.00161 | ** | NA |
| PCDHB15 | 0.0016370 | 0.5512411 | 0.00164 | ** | NA |
| ACSM2B | 0.0017107 | 0.5584212 | 0.00171 | ** | NA |
| OR8D1 | 0.0017233 | 0.5584212 | 0.00172 | ** | NA |
| RFC1 | 0.0018888 | 0.5989004 | 0.00189 | ** | NA |
| KIAA1211L | 0.0019402 | 0.5989004 | 0.00194 | ** | NA |
| SENP8 | 0.0019559 | 0.5989004 | 0.00196 | ** | NA |
| RUVBL1 | 0.0019950 | 0.5989004 | 0.00200 | ** | NA |
| CLEC4C | 0.0020667 | 0.5989004 | 0.00207 | ** | NA |
| PCDHA8 | 0.0020705 | 0.5989004 | 0.00207 | ** | NA |
| PI4K2A | 0.0020961 | 0.5989004 | 0.00210 | ** | NA |
| KPNA6 | 0.0021334 | 0.5989004 | 0.00213 | ** | NA |
| POU2F1 | 0.0022102 | 0.5989004 | 0.00221 | ** | NA |
| EZH2 | 0.0022226 | 0.5989004 | 0.00222 | ** | NA |
| BTBD11 | 0.0022318 | 0.5989004 | 0.00223 | ** | NA |
| ANKRD31 | 0.0023475 | 0.6075732 | 0.00235 | ** | NA |
| OR51T1 | 0.0023614 | 0.6075732 | 0.00236 | ** | NA |
| METTL17 | 0.0023923 | 0.6075732 | 0.00239 | ** | NA |
| CIITA | 0.0024057 | 0.6075732 | 0.00241 | ** | NA |
| ADAM15 | 0.0024806 | 0.6174244 | 0.00248 | ** | NA |
| TUBB2A | 0.0025373 | 0.6182473 | 0.00254 | ** | NA |
| TAAR1 | 0.0025653 | 0.6182473 | 0.00257 | ** | NA |
| ZDHHC24 | 0.0025919 | 0.6182473 | 0.00259 | ** | NA |
| SURF6 | 0.0027361 | 0.6329390 | 0.00274 | ** | NA |
| KIF11 | 0.0027512 | 0.6329390 | 0.00275 | ** | NA |
| GRM7 | 0.0027641 | 0.6329390 | 0.00276 | ** | NA |
| OLFML2B | 0.0029959 | 0.6347331 | 0.00300 | ** | NA |
| SAMM50 | 0.0030102 | 0.6347331 | 0.00301 | ** | NA |
| ACTA1 | 0.0030465 | 0.6347331 | 0.00305 | ** | NA |
| FRMD4B | 0.0030803 | 0.6347331 | 0.00308 | ** | NA |
| SLC26A2 | 0.0030940 | 0.6347331 | 0.00309 | ** | NA |
| MYH13 | 0.0032183 | 0.6347331 | 0.00322 | ** | NA |
| LTC4S | 0.0032514 | 0.6347331 | 0.00325 | ** | NA |
| MAP2K3 | 0.0032831 | 0.6347331 | 0.00328 | ** | NA |
| DIS3L2 | 0.0032847 | 0.6347331 | 0.00328 | ** | NA |
| PTPRE | 0.0032929 | 0.6347331 | 0.00329 | ** | NA |
| CNOT1 | 0.0032940 | 0.6347331 | 0.00329 | ** | NA |
| ANKRD32 | 0.0034117 | 0.6347331 | 0.00341 | ** | NA |
| SCG2 | 0.0034839 | 0.6347331 | 0.00348 | ** | NA |
| TRIM8 | 0.0034895 | 0.6347331 | 0.00349 | ** | NA |
| CAMK2B | 0.0035496 | 0.6347331 | 0.00355 | ** | NA |
| ETAA1 | 0.0035617 | 0.6347331 | 0.00356 | ** | NA |
| ST6GALNAC1 | 0.0035708 | 0.6347331 | 0.00357 | ** | NA |
| SLC38A7 | 0.0036411 | 0.6347331 | 0.00364 | ** | NA |
| SOX9 | 0.0036519 | 0.6347331 | 0.00365 | ** | NA |
| MMRN2 | 0.0036540 | 0.6347331 | 0.00365 | ** | NA |
| SGCD | 0.0037645 | 0.6347331 | 0.00376 | ** | NA |
| KRT83 | 0.0038147 | 0.6347331 | 0.00381 | ** | NA |
| RB1 | 0.0038228 | 0.6347331 | 0.00382 | ** | NA |
| SPAG17 | 0.0038410 | 0.6347331 | 0.00384 | ** | NA |
| EEF2 | 0.0038473 | 0.6347331 | 0.00385 | ** | NA |
| SUSD5 | 0.0039261 | 0.6347331 | 0.00393 | ** | NA |
| NOL7 | 0.0039331 | 0.6347331 | 0.00393 | ** | NA |
| ATR | 0.0039338 | 0.6347331 | 0.00393 | ** | NA |
| AVEN | 0.0041287 | 0.6347331 | 0.00413 | ** | NA |
| ATXN2L | 0.0041364 | 0.6347331 | 0.00414 | ** | NA |
| CD83 | 0.0041819 | 0.6347331 | 0.00418 | ** | NA |
| TOP1MT | 0.0042412 | 0.6347331 | 0.00424 | ** | NA |
| EMILIN3 | 0.0042659 | 0.6347331 | 0.00427 | ** | NA |
| SPHK2 | 0.0043100 | 0.6347331 | 0.00431 | ** | NA |
| PSMB4 | 0.0043587 | 0.6347331 | 0.00436 | ** | NA |
| ACHE | 0.0043818 | 0.6347331 | 0.00438 | ** | NA |
| IQGAP3 | 0.0045407 | 0.6347331 | 0.00454 | ** | NA |
| ZNF135 | 0.0045686 | 0.6347331 | 0.00457 | ** | NA |
| TPP1 | 0.0046303 | 0.6347331 | 0.00463 | ** | NA |
| NFE2L1 | 0.0046446 | 0.6347331 | 0.00464 | ** | NA |
| KHSRP | 0.0046808 | 0.6347331 | 0.00468 | ** | NA |
| PPP1R1C | 0.0047398 | 0.6347331 | 0.00474 | ** | NA |
| CDC37 | 0.0047810 | 0.6347331 | 0.00478 | ** | NA |
| CD200R1 | 0.0048862 | 0.6347331 | 0.00489 | ** | NA |
| CWH43 | 0.0048913 | 0.6347331 | 0.00489 | ** | NA |
| FOXRED1 | 0.0049144 | 0.6347331 | 0.00491 | ** | NA |
| HCRTR1 | 0.0049160 | 0.6347331 | 0.00492 | ** | NA |
| ZC3H4 | 0.0050449 | 0.6347331 | 0.00504 | ** | NA |
| RBBP9 | 0.0050826 | 0.6347331 | 0.00508 | ** | NA |
| OGFOD3 | 0.0051338 | 0.6347331 | 0.00513 | ** | NA |
| TBC1D22A | 0.0052429 | 0.6347331 | 0.00524 | ** | NA |
| APOBEC1 | 0.0052779 | 0.6347331 | 0.00528 | ** | NA |
| OR56A3 | 0.0052916 | 0.6347331 | 0.00529 | ** | NA |
| SLC46A2 | 0.0053108 | 0.6347331 | 0.00531 | ** | NA |
| PIGP | 0.0053363 | 0.6347331 | 0.00534 | ** | NA |
| C6orf15 | 0.0053410 | 0.6347331 | 0.00534 | ** | NA |
| SMPD1 | 0.0053513 | 0.6347331 | 0.00535 | ** | NA |
| RNF43 | 0.0053669 | 0.6347331 | 0.00537 | ** | NA |
| FBXW12 | 0.0053691 | 0.6347331 | 0.00537 | ** | NA |
| RSPH6A | 0.0053903 | 0.6347331 | 0.00539 | ** | NA |
| THADA | 0.0055256 | 0.6347331 | 0.00553 | ** | NA |
| MYOC | 0.0055382 | 0.6347331 | 0.00554 | ** | NA |
| SLC12A4 | 0.0056227 | 0.6347331 | 0.00562 | ** | NA |
| BTAF1 | 0.0056982 | 0.6347331 | 0.00570 | ** | NA |
| DNAJC5B | 0.0057016 | 0.6347331 | 0.00570 | ** | NA |
| TAS2R60 | 0.0057034 | 0.6347331 | 0.00570 | ** | NA |
| IBA57 | 0.0057420 | 0.6347331 | 0.00574 | ** | NA |
| HSD3B7 | 0.0057475 | 0.6347331 | 0.00575 | ** | NA |
| HDGFRP2 | 0.0057596 | 0.6347331 | 0.00576 | ** | NA |
| MRPS34 | 0.0057928 | 0.6347331 | 0.00579 | ** | NA |
| SSH2 | 0.0058405 | 0.6347331 | 0.00584 | ** | NA |
| MBD3L2 | 0.0058844 | 0.6347331 | 0.00588 | ** | NA |
| COPS2 | 0.0059315 | 0.6347331 | 0.00593 | ** | NA |
| WDR75 | 0.0059969 | 0.6347331 | 0.00600 | ** | NA |
| FRYL | 0.0060235 | 0.6347331 | 0.00602 | ** | NA |
| ATXN2 | 0.0060319 | 0.6347331 | 0.00603 | ** | NA |
| NBPF15 | 0.0060598 | 0.6347331 | 0.00606 | ** | NA |
| MUS81 | 0.0061273 | 0.6347331 | 0.00613 | ** | NA |
| CD5L | 0.0061772 | 0.6347331 | 0.00618 | ** | NA |
| DPP7 | 0.0061926 | 0.6347331 | 0.00619 | ** | NA |
| LMX1A | 0.0062097 | 0.6347331 | 0.00621 | ** | NA |
| ANTXRL | 0.0062215 | 0.6347331 | 0.00622 | ** | NA |
| MMS22L | 0.0062232 | 0.6347331 | 0.00622 | ** | NA |
| SFXN3 | 0.0062354 | 0.6347331 | 0.00624 | ** | NA |
| USP32 | 0.0062588 | 0.6347331 | 0.00626 | ** | NA |
| RAB43 | 0.0062666 | 0.6347331 | 0.00627 | ** | NA |
| FAM181B | 0.0062849 | 0.6347331 | 0.00628 | ** | NA |
| SMARCB1 | 0.0062993 | 0.6347331 | 0.00630 | ** | NA |
| ACBD6 | 0.0063036 | 0.6347331 | 0.00630 | ** | NA |
| SUPT7L | 0.0063204 | 0.6347331 | 0.00632 | ** | NA |
| CYP19A1 | 0.0063281 | 0.6347331 | 0.00633 | ** | NA |
| C12orf4 | 0.0063501 | 0.6347331 | 0.00635 | ** | NA |
| FBXL20 | 0.0064106 | 0.6347331 | 0.00641 | ** | NA |
| NXPH4 | 0.0064264 | 0.6347331 | 0.00643 | ** | NA |
| CCNB1 | 0.0065022 | 0.6347331 | 0.00650 | ** | NA |
| OR4N2 | 0.0065455 | 0.6347331 | 0.00655 | ** | NA |
| RGS12 | 0.0065563 | 0.6347331 | 0.00656 | ** | NA |
| CEP76 | 0.0066070 | 0.6347331 | 0.00661 | ** | NA |
| MIIP | 0.0066145 | 0.6347331 | 0.00661 | ** | NA |
| FCN3 | 0.0066248 | 0.6347331 | 0.00662 | ** | NA |
| ZNF107 | 0.0066752 | 0.6347331 | 0.00668 | ** | NA |
| TACSTD2 | 0.0066861 | 0.6347331 | 0.00669 | ** | NA |
| GLG1 | 0.0066950 | 0.6347331 | 0.00670 | ** | NA |
| TSPAN13 | 0.0067540 | 0.6347331 | 0.00675 | ** | NA |
| VPS13D | 0.0067657 | 0.6347331 | 0.00677 | ** | NA |
| LPCAT4 | 0.0067758 | 0.6347331 | 0.00678 | ** | NA |
| DENND4C | 0.0068602 | 0.6347331 | 0.00686 | ** | NA |
| ARID1B | 0.0068718 | 0.6347331 | 0.00687 | ** | NA |
| KDM5B | 0.0068837 | 0.6347331 | 0.00688 | ** | NA |
| IFFO2 | 0.0068857 | 0.6347331 | 0.00689 | ** | NA |
| COG3 | 0.0069083 | 0.6347331 | 0.00691 | ** | NA |
| DHX35 | 0.0069214 | 0.6347331 | 0.00692 | ** | NA |
| MSGN1 | 0.0069483 | 0.6347331 | 0.00695 | ** | NA |
| CLDN5 | 0.0071709 | 0.6515991 | 0.00717 | ** | NA |
| PRKCZ | 0.0072554 | 0.6527818 | 0.00726 | ** | NA |
| ACTL8 | 0.0072969 | 0.6527818 | 0.00730 | ** | NA |
| MGA | 0.0073204 | 0.6527818 | 0.00732 | ** | NA |
| SMCR8 | 0.0073359 | 0.6527818 | 0.00734 | ** | NA |
| EML1 | 0.0073834 | 0.6536203 | 0.00738 | ** | NA |
| F11R | 0.0075268 | 0.6564883 | 0.00753 | ** | NA |
| INTS12 | 0.0075473 | 0.6564883 | 0.00755 | ** | NA |
| NAA15 | 0.0075638 | 0.6564883 | 0.00756 | ** | NA |
| SPTBN4 | 0.0075987 | 0.6564883 | 0.00760 | ** | NA |
| ARSB | 0.0076069 | 0.6564883 | 0.00761 | ** | NA |
| VPS37B | 0.0076704 | 0.6586581 | 0.00767 | ** | NA |
| CREBBP | 0.0077132 | 0.6590337 | 0.00771 | ** | NA |
| NCR3LG1 | 0.0078580 | 0.6680868 | 0.00786 | ** | NA |
| AP5B1 | 0.0079487 | 0.6712453 | 0.00795 | ** | NA |
| ACYP2 | 0.0079841 | 0.6712453 | 0.00798 | ** | NA |
| C20orf26 | 0.0080463 | 0.6712453 | 0.00805 | ** | NA |
| PLOD1 | 0.0080515 | 0.6712453 | 0.00805 | ** | NA |
| OR4S2 | 0.0081035 | 0.6723199 | 0.00810 | ** | NA |
| SPO11 | 0.0081886 | 0.6747536 | 0.00819 | ** | NA |
| MCTP1 | 0.0082300 | 0.6747536 | 0.00823 | ** | NA |
| ELK3 | 0.0083459 | 0.6747536 | 0.00835 | ** | NA |
| H2AFY2 | 0.0083475 | 0.6747536 | 0.00835 | ** | NA |
| MRPL13 | 0.0083762 | 0.6747536 | 0.00838 | ** | NA |
| PAQR3 | 0.0084147 | 0.6747536 | 0.00841 | ** | NA |
| GHITM | 0.0084474 | 0.6747536 | 0.00845 | ** | NA |
| ADO | 0.0085378 | 0.6747536 | 0.00854 | ** | NA |
| MFAP4 | 0.0085654 | 0.6747536 | 0.00857 | ** | NA |
| CD163 | 0.0085663 | 0.6747536 | 0.00857 | ** | NA |
| ARMC7 | 0.0085988 | 0.6747536 | 0.00860 | ** | NA |
| COL8A1 | 0.0086043 | 0.6747536 | 0.00860 | ** | NA |
| LILRB2 | 0.0088241 | 0.6822660 | 0.00882 | ** | NA |
| OR13C3 | 0.0088398 | 0.6822660 | 0.00884 | ** | NA |
| COL7A1 | 0.0088402 | 0.6822660 | 0.00884 | ** | NA |
| C1orf141 | 0.0089498 | 0.6822660 | 0.00895 | ** | NA |
| ARHGEF17 | 0.0089894 | 0.6822660 | 0.00899 | ** | NA |
| COL9A2 | 0.0090078 | 0.6822660 | 0.00901 | ** | NA |
| DTHD1 | 0.0090748 | 0.6822660 | 0.00907 | ** | NA |
| H3F3A | 0.0090755 | 0.6822660 | 0.00908 | ** | NA |
| LRP12 | 0.0090984 | 0.6822660 | 0.00910 | ** | NA |
| CCNL1 | 0.0091597 | 0.6822660 | 0.00916 | ** | NA |
| MAML2 | 0.0091672 | 0.6822660 | 0.00917 | ** | NA |
| TMEM258 | 0.0091769 | 0.6822660 | 0.00918 | ** | NA |
| DNAJC8 | 0.0093058 | 0.6835655 | 0.00931 | ** | NA |
| KRTAP4-4 | 0.0093494 | 0.6835655 | 0.00935 | ** | NA |
| LCTL | 0.0093837 | 0.6835655 | 0.00938 | ** | NA |
| SGK2 | 0.0093988 | 0.6835655 | 0.00940 | ** | NA |
| SLC25A4 | 0.0094344 | 0.6835655 | 0.00943 | ** | NA |
| LY6G6F | 0.0095144 | 0.6835655 | 0.00951 | ** | NA |
| ZNF330 | 0.0095171 | 0.6835655 | 0.00952 | ** | NA |
| NECAB1 | 0.0095689 | 0.6835655 | 0.00957 | ** | NA |
| PPM1A | 0.0095782 | 0.6835655 | 0.00958 | ** | NA |
| GMCL1 | 0.0096195 | 0.6835655 | 0.00962 | ** | NA |
| C16orf58 | 0.0096729 | 0.6835655 | 0.00967 | ** | NA |
| ZBTB44 | 0.0096904 | 0.6835655 | 0.00969 | ** | NA |
| INHBA | 0.0097282 | 0.6835655 | 0.00973 | ** | NA |
| GTF3C1 | 0.0097721 | 0.6835655 | 0.00977 | ** | NA |
| RNF31 | 0.0098119 | 0.6835655 | 0.00981 | ** | NA |
| CACNA1I | 0.0098312 | 0.6835655 | 0.00983 | ** | NA |
| COPS3 | 0.0098996 | 0.6854321 | 0.00990 | ** | NA |
| NOL6 | 0.0099717 | 0.6854321 | 0.00997 | ** | NA |
| WDR77 | 0.0099778 | 0.6854321 | 0.00998 | ** | NA |
wilcox_gene_delmis_plot <- ggplot(data = crispr_signif_delmis) +
geom_histogram(aes(x = p, fill = "chartreuse4"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
geom_histogram(aes(x = p.adj, fill = "darkslategray3"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
scale_x_continuous(breaks = seq(0, 1, by = 0.05), labels = seq(0, 1, by = 0.05)) +
scale_fill_manual(name = "P-values", values = c("chartreuse4" = "chartreuse4", "darkslategray3" = "darkslategray3"), labels = c("Unadjusted", "BH-adjusted")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = c(0.1, 0.85)) +
labs(x = "BH-adjusted p-values", y = "Frequency")
wilcox_gene_delmis_plot
crispr_signif_del_lineage <- compare_means(Score ~ Mutation_Status_Deleterious, group.by = c("Hugo_Symbol", "group_general_lineage_name"), data = crispr_data_ptmuts, method = "wilcox.test", p.adjust.method = "BH")
crispr_signif_del_lineage <- adj_signif(crispr_signif_del_lineage)
crispr_signif_del_lineage <- crispr_signif_del_lineage[order(crispr_signif_del_lineage$p),]
saveRDS(crispr_signif_del_lineage, "./data_munging/rds/crispr_signif_ptmuts_deleterious_lineage.rds")
# write.table(crispr_signif_del_lineage, file = "~/Desktop/crispr_signif_ptmuts_deleterious_lineage.csv", quote = FALSE, sep = ",", row.names = FALSE)
crispr_signif_del_lineage <- readRDS("./data_munging/rds/crispr_signif_ptmuts_deleterious_lineage.rds")
knitr::kable(filter(crispr_signif_del_lineage, p < 0.01)[, c("Hugo_Symbol", "group_general_lineage_name", "p", "p.adj", "p.format", "p.signif", "p.signif.adj")], caption = "Wilcoxon test results comparing deleterious mutant vs other cell lines by lineage, p < 0.01 (BH-adjusted p-values: * p <= 0.05, ** p <= 0.01, *** p <= 0.001, **** p <= 0.0001)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width = "900px", height = "450px")
| Hugo_Symbol | group_general_lineage_name | p | p.adj | p.format | p.signif | p.signif.adj |
|---|---|---|---|---|---|---|
| PTEN | central nervous system cancer | 0.0006658 | 0.8370371 | 0.00067 | *** | NA |
| TP53 | lung cancer | 0.0007958 | 0.8370371 | 0.00080 | *** | NA |
| DCAF8 | lung cancer | 0.0033009 | 0.8370371 | 0.00330 | ** | NA |
| PTEN | ovarian cancer | 0.0043470 | 0.8370371 | 0.00435 | ** | NA |
| SETD2 | kidney cancer | 0.0056647 | 0.8370371 | 0.00566 | ** | NA |
| DVL2 | uterine cancer | 0.0067676 | 0.8370371 | 0.00677 | ** | NA |
| ARID1A | pancreatic cancer | 0.0076726 | 0.8370371 | 0.00767 | ** | NA |
| MCPH1 | lung cancer | 0.0077467 | 0.8370371 | 0.00775 | ** | NA |
wilcox_lineage_plot <- ggplot(data = crispr_signif_del_lineage) +
geom_histogram(aes(x = p, fill = "chartreuse4"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
geom_histogram(aes(x = p.adj, fill = "darkslategray3"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
scale_x_continuous(breaks = seq(0, 1, by = 0.05), labels = seq(0, 1, by = 0.05)) +
scale_fill_manual(name = "P-values", values = c("chartreuse4" = "chartreuse4", "darkslategray3" = "darkslategray3"), labels = c("Unadjusted", "BH-adjusted")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = c(0.1, 0.85)) +
labs(x = "BH-adjusted p-values", y = "Frequency")
wilcox_lineage_plot
crispr_signif_nonsilent_lineage <- compare_means(Score ~ Mutation_Status_Nonsilent, group.by = c("Hugo_Symbol", "group_general_lineage_name"), data = crispr_data_ptmuts, method = "wilcox.test", p.adjust.method = "BH")
crispr_signif_nonsilent_lineage <- adj_signif(crispr_signif_nonsilent_lineage)
crispr_signif_nonsilent_lineage <- crispr_signif_nonsilent_lineage[order(crispr_signif_nonsilent_lineage$p),]
saveRDS(crispr_signif_nonsilent_lineage, "./data_munging/rds/crispr_signif_ptmuts_nonsilent_lineage.rds")
# write.table(crispr_signif_nonsilent_lineage, file = "~/Desktop/crispr_signif_ptmuts_nonsilent_lineage.csv", quote = FALSE, sep = ",", row.names = FALSE)
crispr_signif_nonsilent_lineage <- readRDS("./data_munging/rds/crispr_signif_ptmuts_nonsilent_lineage.rds")
knitr::kable(filter(crispr_signif_nonsilent_lineage, p < 0.01)[, c("Hugo_Symbol", "group_general_lineage_name", "p", "p.adj", "p.format", "p.signif", "p.signif.adj")], caption = "Wilcoxon test results comparing non-silent mutant vs other cell lines by lineage, p < 0.01 (BH-adjusted p-values: * p <= 0.05, ** p <= 0.01, *** p <= 0.001, **** p <= 0.0001)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width = "900px", height = "450px")
| Hugo_Symbol | group_general_lineage_name | p | p.adj | p.format | p.signif | p.signif.adj |
|---|---|---|---|---|---|---|
| KRAS | lung cancer | 0.0000000 | 0.0003131 | 2.5e-09 | **** | *** |
| TP53 | lung cancer | 0.0000092 | 0.5639398 | 9.2e-06 | **** | NA |
| TP53 | central nervous system cancer | 0.0000386 | 0.8671069 | 3.9e-05 | **** | NA |
| KRAS | ovarian cancer | 0.0001488 | 0.8671069 | 0.00015 | *** | NA |
| TP53 | ovarian cancer | 0.0002344 | 0.8671069 | 0.00023 | *** | NA |
| NRAS | leukemia | 0.0005316 | 0.8671069 | 0.00053 | *** | NA |
| PIK3CA | ovarian cancer | 0.0008371 | 0.8671069 | 0.00084 | *** | NA |
| DZANK1 | breast cancer | 0.0009819 | 0.8671069 | 0.00098 | *** | NA |
| PIK3CA | breast cancer | 0.0009842 | 0.8671069 | 0.00098 | *** | NA |
| KRAS | colorectal cancer | 0.0010547 | 0.8671069 | 0.00105 | ** | NA |
| TP53 | leukemia | 0.0010605 | 0.8671069 | 0.00106 | ** | NA |
| UNC45B | lung cancer | 0.0011215 | 0.8671069 | 0.00112 | ** | NA |
| GOLGA3 | uterine cancer | 0.0013666 | 0.8671069 | 0.00137 | ** | NA |
| NRAS | multiple myeloma | 0.0013756 | 0.8671069 | 0.00138 | ** | NA |
| GTF3C1 | lung cancer | 0.0013893 | 0.8671069 | 0.00139 | ** | NA |
| TICRR | uterine cancer | 0.0014839 | 0.8671069 | 0.00148 | ** | NA |
| ARID1A | pancreatic cancer | 0.0014920 | 0.8671069 | 0.00149 | ** | NA |
| NRAS | skin cancer | 0.0016637 | 0.8671069 | 0.00166 | ** | NA |
| HYOU1 | colorectal cancer | 0.0018107 | 0.8671069 | 0.00181 | ** | NA |
| USP34 | breast cancer | 0.0018820 | 0.8671069 | 0.00188 | ** | NA |
| MGA | uterine cancer | 0.0019495 | 0.8671069 | 0.00195 | ** | NA |
| TNRC6A | lung cancer | 0.0023744 | 0.8671069 | 0.00237 | ** | NA |
| SNX29 | uterine cancer | 0.0026104 | 0.8671069 | 0.00261 | ** | NA |
| KMT2B | breast cancer | 0.0026433 | 0.8671069 | 0.00264 | ** | NA |
| TP53 | kidney cancer | 0.0027562 | 0.8671069 | 0.00276 | ** | NA |
| GSPT1 | colorectal cancer | 0.0028115 | 0.8671069 | 0.00281 | ** | NA |
| AP1G2 | breast cancer | 0.0029835 | 0.8671069 | 0.00298 | ** | NA |
| ZNF264 | colorectal cancer | 0.0029879 | 0.8671069 | 0.00299 | ** | NA |
| PTEN | ovarian cancer | 0.0032141 | 0.8671069 | 0.00321 | ** | NA |
| KMT2B | ovarian cancer | 0.0032684 | 0.8671069 | 0.00327 | ** | NA |
| KIAA0586 | colorectal cancer | 0.0032838 | 0.8671069 | 0.00328 | ** | NA |
| ANK2 | colorectal cancer | 0.0033597 | 0.8671069 | 0.00336 | ** | NA |
| TGS1 | uterine cancer | 0.0035864 | 0.8671069 | 0.00359 | ** | NA |
| COL2A1 | leukemia | 0.0036054 | 0.8671069 | 0.00361 | ** | NA |
| HYDIN | colorectal cancer | 0.0036704 | 0.8671069 | 0.00367 | ** | NA |
| TP53 | colorectal cancer | 0.0036994 | 0.8671069 | 0.00370 | ** | NA |
| TP53 | skin cancer | 0.0037227 | 0.8671069 | 0.00372 | ** | NA |
| VHL | kidney cancer | 0.0037539 | 0.8671069 | 0.00375 | ** | NA |
| PAPPA | uterine cancer | 0.0037971 | 0.8671069 | 0.00380 | ** | NA |
| ZNF292 | lung cancer | 0.0038193 | 0.8671069 | 0.00382 | ** | NA |
| PTEN | central nervous system cancer | 0.0038648 | 0.8671069 | 0.00386 | ** | NA |
| NEBL | uterine cancer | 0.0039522 | 0.8671069 | 0.00395 | ** | NA |
| KRAS | stomach cancer | 0.0040001 | 0.8671069 | 0.00400 | ** | NA |
| MTMR3 | lung cancer | 0.0041192 | 0.8671069 | 0.00412 | ** | NA |
| TSKS | uterine cancer | 0.0042550 | 0.8671069 | 0.00425 | ** | NA |
| ARHGAP12 | colorectal cancer | 0.0042860 | 0.8671069 | 0.00429 | ** | NA |
| PAM | uterine cancer | 0.0043488 | 0.8671069 | 0.00435 | ** | NA |
| PDE10A | uterine cancer | 0.0043488 | 0.8671069 | 0.00435 | ** | NA |
| MAML2 | uterine cancer | 0.0044510 | 0.8671069 | 0.00445 | ** | NA |
| NCOR2 | colorectal cancer | 0.0046575 | 0.8671069 | 0.00466 | ** | NA |
| VAV3 | lung cancer | 0.0047769 | 0.8671069 | 0.00478 | ** | NA |
| SYNE1 | lung cancer | 0.0047800 | 0.8671069 | 0.00478 | ** | NA |
| NUP88 | uterine cancer | 0.0048888 | 0.8671069 | 0.00489 | ** | NA |
| LRRIQ1 | central nervous system cancer | 0.0049271 | 0.8671069 | 0.00493 | ** | NA |
| SVOPL | lung cancer | 0.0049276 | 0.8671069 | 0.00493 | ** | NA |
| PTEN | uterine cancer | 0.0049419 | 0.8671069 | 0.00494 | ** | NA |
| BRAF | skin cancer | 0.0049885 | 0.8671069 | 0.00499 | ** | NA |
| AXIN1 | ovarian cancer | 0.0050354 | 0.8671069 | 0.00504 | ** | NA |
| TNK2 | uterine cancer | 0.0050864 | 0.8671069 | 0.00509 | ** | NA |
| DIP2C | kidney cancer | 0.0052747 | 0.8671069 | 0.00527 | ** | NA |
| CTAGE15 | lung cancer | 0.0053375 | 0.8671069 | 0.00534 | ** | NA |
| LSP1 | colorectal cancer | 0.0053452 | 0.8671069 | 0.00535 | ** | NA |
| CORIN | colorectal cancer | 0.0054756 | 0.8671069 | 0.00548 | ** | NA |
| SETD2 | kidney cancer | 0.0056647 | 0.8671069 | 0.00566 | ** | NA |
| FNDC7 | colorectal cancer | 0.0057059 | 0.8671069 | 0.00571 | ** | NA |
| SKOR1 | uterine cancer | 0.0057795 | 0.8671069 | 0.00578 | ** | NA |
| PARD3B | uterine cancer | 0.0059302 | 0.8671069 | 0.00593 | ** | NA |
| HTR7 | lung cancer | 0.0059492 | 0.8671069 | 0.00595 | ** | NA |
| FNIP1 | ovarian cancer | 0.0060557 | 0.8671069 | 0.00606 | ** | NA |
| AP2A2 | breast cancer | 0.0061570 | 0.8671069 | 0.00616 | ** | NA |
| ASXL1 | colorectal cancer | 0.0061648 | 0.8671069 | 0.00616 | ** | NA |
| CCT8L2 | colorectal cancer | 0.0061648 | 0.8671069 | 0.00616 | ** | NA |
| MTMR14 | colorectal cancer | 0.0061648 | 0.8671069 | 0.00616 | ** | NA |
| TTC32 | uterine cancer | 0.0062881 | 0.8671069 | 0.00629 | ** | NA |
| PIK3R1 | ovarian cancer | 0.0062895 | 0.8671069 | 0.00629 | ** | NA |
| NR4A1 | lung cancer | 0.0064934 | 0.8671069 | 0.00649 | ** | NA |
| SLC22A17 | lung cancer | 0.0064934 | 0.8671069 | 0.00649 | ** | NA |
| FAM208B | colorectal cancer | 0.0065065 | 0.8671069 | 0.00651 | ** | NA |
| ROCK1 | colorectal cancer | 0.0066106 | 0.8671069 | 0.00661 | ** | NA |
| ZNF521 | colorectal cancer | 0.0066975 | 0.8671069 | 0.00670 | ** | NA |
| RFWD3 | lung cancer | 0.0067488 | 0.8671069 | 0.00675 | ** | NA |
| TYRP1 | lung cancer | 0.0067488 | 0.8671069 | 0.00675 | ** | NA |
| ADRBK2 | uterine cancer | 0.0067676 | 0.8671069 | 0.00677 | ** | NA |
| DVL2 | uterine cancer | 0.0067676 | 0.8671069 | 0.00677 | ** | NA |
| EXOC8 | uterine cancer | 0.0067676 | 0.8671069 | 0.00677 | ** | NA |
| ITSN1 | colorectal cancer | 0.0069403 | 0.8671069 | 0.00694 | ** | NA |
| ZNF292 | colorectal cancer | 0.0069403 | 0.8671069 | 0.00694 | ** | NA |
| MIS18BP1 | colorectal cancer | 0.0070098 | 0.8671069 | 0.00701 | ** | NA |
| OR5M8 | colorectal cancer | 0.0070184 | 0.8671069 | 0.00702 | ** | NA |
| LILRB2 | leukemia | 0.0070255 | 0.8671069 | 0.00703 | ** | NA |
| PIK3CA | colorectal cancer | 0.0070506 | 0.8671069 | 0.00705 | ** | NA |
| DDX11 | lung cancer | 0.0070952 | 0.8671069 | 0.00710 | ** | NA |
| KMT2C | multiple myeloma | 0.0071541 | 0.8671069 | 0.00715 | ** | NA |
| MYH6 | uterine cancer | 0.0071786 | 0.8671069 | 0.00718 | ** | NA |
| ARID1B | ovarian cancer | 0.0072055 | 0.8671069 | 0.00721 | ** | NA |
| SURF6 | uterine cancer | 0.0073058 | 0.8671069 | 0.00731 | ** | NA |
| C1orf86 | leukemia | 0.0073710 | 0.8671069 | 0.00737 | ** | NA |
| TEX10 | ovarian cancer | 0.0075601 | 0.8671069 | 0.00756 | ** | NA |
| ALPK3 | ovarian cancer | 0.0076300 | 0.8671069 | 0.00763 | ** | NA |
| MYOM3 | lung cancer | 0.0076435 | 0.8671069 | 0.00764 | ** | NA |
| PNPLA5 | colorectal cancer | 0.0077740 | 0.8671069 | 0.00777 | ** | NA |
| PTGFRN | lung cancer | 0.0078691 | 0.8671069 | 0.00787 | ** | NA |
| PCDH10 | uterine cancer | 0.0078925 | 0.8671069 | 0.00789 | ** | NA |
| ANKRD23 | colorectal cancer | 0.0078965 | 0.8671069 | 0.00790 | ** | NA |
| HERC1 | skin cancer | 0.0079501 | 0.8671069 | 0.00795 | ** | NA |
| C12orf4 | uterine cancer | 0.0080215 | 0.8671069 | 0.00802 | ** | NA |
| EGFLAM | lung cancer | 0.0081906 | 0.8671069 | 0.00819 | ** | NA |
| SPRED1 | lung cancer | 0.0084512 | 0.8671069 | 0.00845 | ** | NA |
| SBF2 | lung cancer | 0.0084541 | 0.8671069 | 0.00845 | ** | NA |
| EFCAB5 | ovarian cancer | 0.0084624 | 0.8671069 | 0.00846 | ** | NA |
| CSNK1D | uterine cancer | 0.0085754 | 0.8671069 | 0.00858 | ** | NA |
| FAM110A | uterine cancer | 0.0085754 | 0.8671069 | 0.00858 | ** | NA |
| MICAL1 | uterine cancer | 0.0085754 | 0.8671069 | 0.00858 | ** | NA |
| MORF4L1 | uterine cancer | 0.0085754 | 0.8671069 | 0.00858 | ** | NA |
| RIOK2 | uterine cancer | 0.0085754 | 0.8671069 | 0.00858 | ** | NA |
| GAA | colorectal cancer | 0.0085960 | 0.8671069 | 0.00860 | ** | NA |
| KIAA1107 | colorectal cancer | 0.0085960 | 0.8671069 | 0.00860 | ** | NA |
| ZNF536 | skin cancer | 0.0086987 | 0.8671069 | 0.00870 | ** | NA |
| RPS6KA2 | uterine cancer | 0.0087779 | 0.8671069 | 0.00878 | ** | NA |
| SOGA1 | lung cancer | 0.0088239 | 0.8671069 | 0.00882 | ** | NA |
| TPR | lung cancer | 0.0088831 | 0.8671069 | 0.00888 | ** | NA |
| TECRL | lung cancer | 0.0089331 | 0.8671069 | 0.00893 | ** | NA |
| SVEP1 | skin cancer | 0.0090238 | 0.8671069 | 0.00902 | ** | NA |
| DPYSL5 | uterine cancer | 0.0091517 | 0.8671069 | 0.00915 | ** | NA |
| FAM189A1 | uterine cancer | 0.0091517 | 0.8671069 | 0.00915 | ** | NA |
| HSPB2 | uterine cancer | 0.0091517 | 0.8671069 | 0.00915 | ** | NA |
| IRX6 | uterine cancer | 0.0091517 | 0.8671069 | 0.00915 | ** | NA |
| XKR9 | uterine cancer | 0.0091517 | 0.8671069 | 0.00915 | ** | NA |
| KIAA0922 | lung cancer | 0.0091532 | 0.8671069 | 0.00915 | ** | NA |
| STK17B | lung cancer | 0.0092125 | 0.8671069 | 0.00921 | ** | NA |
| TTF1 | lung cancer | 0.0092125 | 0.8671069 | 0.00921 | ** | NA |
| TBC1D10B | uterine cancer | 0.0092568 | 0.8671069 | 0.00926 | ** | NA |
| DNAJC8 | uterine cancer | 0.0092717 | 0.8671069 | 0.00927 | ** | NA |
| PKDCC | uterine cancer | 0.0092717 | 0.8671069 | 0.00927 | ** | NA |
| TCF3 | uterine cancer | 0.0092717 | 0.8671069 | 0.00927 | ** | NA |
| ZNF207 | uterine cancer | 0.0093098 | 0.8671069 | 0.00931 | ** | NA |
| RASIP1 | colorectal cancer | 0.0093199 | 0.8671069 | 0.00932 | ** | NA |
| AP5B1 | uterine cancer | 0.0093215 | 0.8671069 | 0.00932 | ** | NA |
| RAD50 | leukemia | 0.0094090 | 0.8671069 | 0.00941 | ** | NA |
| KRT17 | breast cancer | 0.0094667 | 0.8671069 | 0.00947 | ** | NA |
| SCFD1 | colorectal cancer | 0.0094882 | 0.8671069 | 0.00949 | ** | NA |
| DNAH5 | colorectal cancer | 0.0095928 | 0.8671069 | 0.00959 | ** | NA |
| INTS4 | uterine cancer | 0.0096407 | 0.8671069 | 0.00964 | ** | NA |
| TTF1 | leukemia | 0.0097690 | 0.8671069 | 0.00977 | ** | NA |
| LRP4 | uterine cancer | 0.0099566 | 0.8671069 | 0.00996 | ** | NA |
| ZNF469 | uterine cancer | 0.0099581 | 0.8671069 | 0.00996 | ** | NA |
| IPO4 | uterine cancer | 0.0099688 | 0.8671069 | 0.00997 | ** | NA |
wilcox_lineage_nonsilent_plot <- ggplot(data = crispr_signif_nonsilent_lineage) +
geom_histogram(aes(x = p, fill = "chartreuse4"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
geom_histogram(aes(x = p.adj, fill = "darkslategray3"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
scale_x_continuous(breaks = seq(0, 1, by = 0.05), labels = seq(0, 1, by = 0.05)) +
scale_fill_manual(name = "P-values", values = c("chartreuse4" = "chartreuse4", "darkslategray3" = "darkslategray3"), labels = c("Unadjusted", "BH-adjusted")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = c(0.1, 0.85)) +
labs(x = "BH-adjusted p-values", y = "Frequency")
wilcox_lineage_nonsilent_plot
crispr_signif_delmis_lineage <- compare_means(Score ~ Mutation_Status_DeleteriousMissense, group.by = c("Hugo_Symbol", "group_general_lineage_name"), data = crispr_data_ptmuts, method = "wilcox.test", p.adjust.method = "BH")
crispr_signif_delmis_lineage <- adj_signif(crispr_signif_delmis_lineage)
crispr_signif_delmis_lineage <- crispr_signif_delmis_lineage[order(crispr_signif_delmis_lineage$p),]
saveRDS(crispr_signif_delmis_lineage, "./data_munging/rds/crispr_signif_ptmuts_deleteriousmissense_lineage.rds")
# write.table(crispr_signif_delmis_lineage, file = "~/Desktop/crispr_signif_ptmuts_deleteriousmissense_lineage.csv", quote = FALSE, sep = ",", row.names = FALSE)
crispr_signif_delmis_lineage <- readRDS("./data_munging/rds/crispr_signif_ptmuts_deleteriousmissense_lineage.rds")
knitr::kable(filter(crispr_signif_delmis_lineage, p < 0.01)[, c("Hugo_Symbol", "group_general_lineage_name", "p", "p.adj", "p.format", "p.signif", "p.signif.adj")], caption = "Wilcoxon test results comparing deleterious and missense mutant vs other cell lines by lineage, p < 0.01 (BH-adjusted p-values: * p <= 0.05, ** p <= 0.01, *** p <= 0.001, **** p <= 0.0001)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width = "900px", height = "450px")
| Hugo_Symbol | group_general_lineage_name | p | p.adj | p.format | p.signif | p.signif.adj |
|---|---|---|---|---|---|---|
| KRAS | lung cancer | 0.0000000 | 0.0003122 | 2.5e-09 | **** | *** |
| TP53 | lung cancer | 0.0000092 | 0.5624195 | 9.2e-06 | **** | NA |
| KRAS | ovarian cancer | 0.0001488 | 0.8669363 | 0.00015 | *** | NA |
| TP53 | central nervous system cancer | 0.0001651 | 0.8669363 | 0.00017 | *** | NA |
| TP53 | ovarian cancer | 0.0002344 | 0.8669363 | 0.00023 | *** | NA |
| NRAS | leukemia | 0.0005316 | 0.8669363 | 0.00053 | *** | NA |
| PIK3CA | ovarian cancer | 0.0008371 | 0.8669363 | 0.00084 | *** | NA |
| DZANK1 | breast cancer | 0.0009819 | 0.8669363 | 0.00098 | *** | NA |
| PIK3CA | breast cancer | 0.0009842 | 0.8669363 | 0.00098 | *** | NA |
| KRAS | colorectal cancer | 0.0010547 | 0.8669363 | 0.00105 | ** | NA |
| TP53 | leukemia | 0.0010605 | 0.8669363 | 0.00106 | ** | NA |
| UNC45B | lung cancer | 0.0011215 | 0.8669363 | 0.00112 | ** | NA |
| GOLGA3 | uterine cancer | 0.0013666 | 0.8669363 | 0.00137 | ** | NA |
| NRAS | multiple myeloma | 0.0013756 | 0.8669363 | 0.00138 | ** | NA |
| GTF3C1 | lung cancer | 0.0013893 | 0.8669363 | 0.00139 | ** | NA |
| TICRR | uterine cancer | 0.0014839 | 0.8669363 | 0.00148 | ** | NA |
| ARID1A | pancreatic cancer | 0.0014920 | 0.8669363 | 0.00149 | ** | NA |
| NRAS | skin cancer | 0.0016637 | 0.8669363 | 0.00166 | ** | NA |
| HYOU1 | colorectal cancer | 0.0018107 | 0.8669363 | 0.00181 | ** | NA |
| USP34 | breast cancer | 0.0018820 | 0.8669363 | 0.00188 | ** | NA |
| MGA | uterine cancer | 0.0019495 | 0.8669363 | 0.00195 | ** | NA |
| TNRC6A | lung cancer | 0.0023744 | 0.8669363 | 0.00237 | ** | NA |
| SNX29 | uterine cancer | 0.0026104 | 0.8669363 | 0.00261 | ** | NA |
| KMT2B | breast cancer | 0.0026433 | 0.8669363 | 0.00264 | ** | NA |
| TP53 | kidney cancer | 0.0027562 | 0.8669363 | 0.00276 | ** | NA |
| GSPT1 | colorectal cancer | 0.0028115 | 0.8669363 | 0.00281 | ** | NA |
| AP1G2 | breast cancer | 0.0029835 | 0.8669363 | 0.00298 | ** | NA |
| ZNF264 | colorectal cancer | 0.0029879 | 0.8669363 | 0.00299 | ** | NA |
| PTEN | ovarian cancer | 0.0032141 | 0.8669363 | 0.00321 | ** | NA |
| KMT2B | ovarian cancer | 0.0032684 | 0.8669363 | 0.00327 | ** | NA |
| KIAA0586 | colorectal cancer | 0.0032838 | 0.8669363 | 0.00328 | ** | NA |
| ANK2 | colorectal cancer | 0.0033597 | 0.8669363 | 0.00336 | ** | NA |
| TGS1 | uterine cancer | 0.0035864 | 0.8669363 | 0.00359 | ** | NA |
| COL2A1 | leukemia | 0.0036054 | 0.8669363 | 0.00361 | ** | NA |
| HYDIN | colorectal cancer | 0.0036704 | 0.8669363 | 0.00367 | ** | NA |
| TP53 | colorectal cancer | 0.0036994 | 0.8669363 | 0.00370 | ** | NA |
| TP53 | skin cancer | 0.0037227 | 0.8669363 | 0.00372 | ** | NA |
| VHL | kidney cancer | 0.0037539 | 0.8669363 | 0.00375 | ** | NA |
| PAPPA | uterine cancer | 0.0037971 | 0.8669363 | 0.00380 | ** | NA |
| ZNF292 | lung cancer | 0.0038193 | 0.8669363 | 0.00382 | ** | NA |
| PTEN | central nervous system cancer | 0.0038648 | 0.8669363 | 0.00386 | ** | NA |
| NEBL | uterine cancer | 0.0039522 | 0.8669363 | 0.00395 | ** | NA |
| KRAS | stomach cancer | 0.0040001 | 0.8669363 | 0.00400 | ** | NA |
| MTMR3 | lung cancer | 0.0041192 | 0.8669363 | 0.00412 | ** | NA |
| TSKS | uterine cancer | 0.0042550 | 0.8669363 | 0.00425 | ** | NA |
| ARHGAP12 | colorectal cancer | 0.0042860 | 0.8669363 | 0.00429 | ** | NA |
| PAM | uterine cancer | 0.0043488 | 0.8669363 | 0.00435 | ** | NA |
| PDE10A | uterine cancer | 0.0043488 | 0.8669363 | 0.00435 | ** | NA |
| NCOR2 | colorectal cancer | 0.0046575 | 0.8669363 | 0.00466 | ** | NA |
| VAV3 | lung cancer | 0.0047769 | 0.8669363 | 0.00478 | ** | NA |
| SYNE1 | lung cancer | 0.0047800 | 0.8669363 | 0.00478 | ** | NA |
| NUP88 | uterine cancer | 0.0048888 | 0.8669363 | 0.00489 | ** | NA |
| LRRIQ1 | central nervous system cancer | 0.0049271 | 0.8669363 | 0.00493 | ** | NA |
| SVOPL | lung cancer | 0.0049276 | 0.8669363 | 0.00493 | ** | NA |
| PTEN | uterine cancer | 0.0049419 | 0.8669363 | 0.00494 | ** | NA |
| BRAF | skin cancer | 0.0049885 | 0.8669363 | 0.00499 | ** | NA |
| AXIN1 | ovarian cancer | 0.0050354 | 0.8669363 | 0.00504 | ** | NA |
| TNK2 | uterine cancer | 0.0050864 | 0.8669363 | 0.00509 | ** | NA |
| DIP2C | kidney cancer | 0.0052747 | 0.8669363 | 0.00527 | ** | NA |
| CTAGE15 | lung cancer | 0.0053375 | 0.8669363 | 0.00534 | ** | NA |
| LSP1 | colorectal cancer | 0.0053452 | 0.8669363 | 0.00535 | ** | NA |
| CORIN | colorectal cancer | 0.0054756 | 0.8669363 | 0.00548 | ** | NA |
| SETD2 | kidney cancer | 0.0056647 | 0.8669363 | 0.00566 | ** | NA |
| FNDC7 | colorectal cancer | 0.0057059 | 0.8669363 | 0.00571 | ** | NA |
| SKOR1 | uterine cancer | 0.0057795 | 0.8669363 | 0.00578 | ** | NA |
| PARD3B | uterine cancer | 0.0059302 | 0.8669363 | 0.00593 | ** | NA |
| HTR7 | lung cancer | 0.0059492 | 0.8669363 | 0.00595 | ** | NA |
| FNIP1 | ovarian cancer | 0.0060557 | 0.8669363 | 0.00606 | ** | NA |
| AP2A2 | breast cancer | 0.0061570 | 0.8669363 | 0.00616 | ** | NA |
| ASXL1 | colorectal cancer | 0.0061648 | 0.8669363 | 0.00616 | ** | NA |
| CCT8L2 | colorectal cancer | 0.0061648 | 0.8669363 | 0.00616 | ** | NA |
| MTMR14 | colorectal cancer | 0.0061648 | 0.8669363 | 0.00616 | ** | NA |
| TTC32 | uterine cancer | 0.0062881 | 0.8669363 | 0.00629 | ** | NA |
| NR4A1 | lung cancer | 0.0064934 | 0.8669363 | 0.00649 | ** | NA |
| SLC22A17 | lung cancer | 0.0064934 | 0.8669363 | 0.00649 | ** | NA |
| FAM208B | colorectal cancer | 0.0065065 | 0.8669363 | 0.00651 | ** | NA |
| ROCK1 | colorectal cancer | 0.0066106 | 0.8669363 | 0.00661 | ** | NA |
| ZNF521 | colorectal cancer | 0.0066975 | 0.8669363 | 0.00670 | ** | NA |
| RFWD3 | lung cancer | 0.0067488 | 0.8669363 | 0.00675 | ** | NA |
| TYRP1 | lung cancer | 0.0067488 | 0.8669363 | 0.00675 | ** | NA |
| ADRBK2 | uterine cancer | 0.0067676 | 0.8669363 | 0.00677 | ** | NA |
| DVL2 | uterine cancer | 0.0067676 | 0.8669363 | 0.00677 | ** | NA |
| EXOC8 | uterine cancer | 0.0067676 | 0.8669363 | 0.00677 | ** | NA |
| ITSN1 | colorectal cancer | 0.0069403 | 0.8669363 | 0.00694 | ** | NA |
| ZNF292 | colorectal cancer | 0.0069403 | 0.8669363 | 0.00694 | ** | NA |
| MIS18BP1 | colorectal cancer | 0.0070098 | 0.8669363 | 0.00701 | ** | NA |
| OR5M8 | colorectal cancer | 0.0070184 | 0.8669363 | 0.00702 | ** | NA |
| PIK3CA | colorectal cancer | 0.0070506 | 0.8669363 | 0.00705 | ** | NA |
| DDX11 | lung cancer | 0.0070952 | 0.8669363 | 0.00710 | ** | NA |
| KMT2C | multiple myeloma | 0.0071541 | 0.8669363 | 0.00715 | ** | NA |
| MYH6 | uterine cancer | 0.0071786 | 0.8669363 | 0.00718 | ** | NA |
| ARID1B | ovarian cancer | 0.0072055 | 0.8669363 | 0.00721 | ** | NA |
| SURF6 | uterine cancer | 0.0073058 | 0.8669363 | 0.00731 | ** | NA |
| C1orf86 | leukemia | 0.0073710 | 0.8669363 | 0.00737 | ** | NA |
| TEX10 | ovarian cancer | 0.0075601 | 0.8669363 | 0.00756 | ** | NA |
| ALPK3 | ovarian cancer | 0.0076300 | 0.8669363 | 0.00763 | ** | NA |
| MYOM3 | lung cancer | 0.0076435 | 0.8669363 | 0.00764 | ** | NA |
| PNPLA5 | colorectal cancer | 0.0077740 | 0.8669363 | 0.00777 | ** | NA |
| PTGFRN | lung cancer | 0.0078691 | 0.8669363 | 0.00787 | ** | NA |
| PCDH10 | uterine cancer | 0.0078925 | 0.8669363 | 0.00789 | ** | NA |
| ANKRD23 | colorectal cancer | 0.0078965 | 0.8669363 | 0.00790 | ** | NA |
| HERC1 | skin cancer | 0.0079501 | 0.8669363 | 0.00795 | ** | NA |
| C12orf4 | uterine cancer | 0.0080215 | 0.8669363 | 0.00802 | ** | NA |
| EGFLAM | lung cancer | 0.0081906 | 0.8669363 | 0.00819 | ** | NA |
| SPRED1 | lung cancer | 0.0084512 | 0.8669363 | 0.00845 | ** | NA |
| SBF2 | lung cancer | 0.0084541 | 0.8669363 | 0.00845 | ** | NA |
| EFCAB5 | ovarian cancer | 0.0084624 | 0.8669363 | 0.00846 | ** | NA |
| CSNK1D | uterine cancer | 0.0085754 | 0.8669363 | 0.00858 | ** | NA |
| FAM110A | uterine cancer | 0.0085754 | 0.8669363 | 0.00858 | ** | NA |
| MICAL1 | uterine cancer | 0.0085754 | 0.8669363 | 0.00858 | ** | NA |
| MORF4L1 | uterine cancer | 0.0085754 | 0.8669363 | 0.00858 | ** | NA |
| RIOK2 | uterine cancer | 0.0085754 | 0.8669363 | 0.00858 | ** | NA |
| GAA | colorectal cancer | 0.0085960 | 0.8669363 | 0.00860 | ** | NA |
| KIAA1107 | colorectal cancer | 0.0085960 | 0.8669363 | 0.00860 | ** | NA |
| ZNF536 | skin cancer | 0.0086987 | 0.8669363 | 0.00870 | ** | NA |
| RPS6KA2 | uterine cancer | 0.0087779 | 0.8669363 | 0.00878 | ** | NA |
| SOGA1 | lung cancer | 0.0088239 | 0.8669363 | 0.00882 | ** | NA |
| TPR | lung cancer | 0.0088831 | 0.8669363 | 0.00888 | ** | NA |
| SHANK1 | central nervous system cancer | 0.0088878 | 0.8669363 | 0.00889 | ** | NA |
| TECRL | lung cancer | 0.0089331 | 0.8669363 | 0.00893 | ** | NA |
| SVEP1 | skin cancer | 0.0090238 | 0.8669363 | 0.00902 | ** | NA |
| DPYSL5 | uterine cancer | 0.0091517 | 0.8669363 | 0.00915 | ** | NA |
| FAM189A1 | uterine cancer | 0.0091517 | 0.8669363 | 0.00915 | ** | NA |
| IRX6 | uterine cancer | 0.0091517 | 0.8669363 | 0.00915 | ** | NA |
| XKR9 | uterine cancer | 0.0091517 | 0.8669363 | 0.00915 | ** | NA |
| KIAA0922 | lung cancer | 0.0091532 | 0.8669363 | 0.00915 | ** | NA |
| TTF1 | lung cancer | 0.0092125 | 0.8669363 | 0.00921 | ** | NA |
| TBC1D10B | uterine cancer | 0.0092568 | 0.8669363 | 0.00926 | ** | NA |
| DNAJC8 | uterine cancer | 0.0092717 | 0.8669363 | 0.00927 | ** | NA |
| PKDCC | uterine cancer | 0.0092717 | 0.8669363 | 0.00927 | ** | NA |
| TCF3 | uterine cancer | 0.0092717 | 0.8669363 | 0.00927 | ** | NA |
| ZNF207 | uterine cancer | 0.0093098 | 0.8669363 | 0.00931 | ** | NA |
| RASIP1 | colorectal cancer | 0.0093199 | 0.8669363 | 0.00932 | ** | NA |
| AP5B1 | uterine cancer | 0.0093215 | 0.8669363 | 0.00932 | ** | NA |
| RAD50 | leukemia | 0.0094090 | 0.8669363 | 0.00941 | ** | NA |
| KRT17 | breast cancer | 0.0094667 | 0.8669363 | 0.00947 | ** | NA |
| SCFD1 | colorectal cancer | 0.0094882 | 0.8669363 | 0.00949 | ** | NA |
| DNAH5 | colorectal cancer | 0.0095928 | 0.8669363 | 0.00959 | ** | NA |
| INTS4 | uterine cancer | 0.0096407 | 0.8669363 | 0.00964 | ** | NA |
| TTF1 | leukemia | 0.0097690 | 0.8669363 | 0.00977 | ** | NA |
| LRP4 | uterine cancer | 0.0099566 | 0.8669363 | 0.00996 | ** | NA |
| ZNF469 | uterine cancer | 0.0099581 | 0.8669363 | 0.00996 | ** | NA |
| IPO4 | uterine cancer | 0.0099688 | 0.8669363 | 0.00997 | ** | NA |
wilcox_gene_delmis_lineage_plot <- ggplot(data = crispr_signif_delmis_lineage) +
geom_histogram(aes(x = p, fill = "chartreuse4"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
geom_histogram(aes(x = p.adj, fill = "darkslategray3"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
scale_x_continuous(breaks = seq(0, 1, by = 0.05), labels = seq(0, 1, by = 0.05)) +
scale_fill_manual(name = "P-values", values = c("chartreuse4" = "chartreuse4", "darkslategray3" = "darkslategray3"), labels = c("Unadjusted", "BH-adjusted")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = c(0.1, 0.85)) +
labs(x = "BH-adjusted p-values", y = "Frequency")
wilcox_gene_delmis_lineage_plot
crispr_signif_allmut_del <- compare_means(Score ~ Mutation_Status_Deleterious, group.by = c("Hugo_Symbol"), data = crispr_data, method = "wilcox.test", p.adjust.method = "BH")
crispr_signif_allmut_del <- adj_signif(crispr_signif_allmut_del)
crispr_signif_allmut_del <- crispr_signif_allmut_del[order(crispr_signif_allmut_del$p),]
saveRDS(crispr_signif_allmut_del, "./data_munging/rds/crispr_signif_allmut_deleterious_gene.rds")
# write.table(crispr_signif_allmut_del, file = "~/Desktop/crispr_signif_allmut_deleterious_gene.csv", quote = FALSE, sep = ",", row.names = FALSE)
crispr_signif_allmut_del <- readRDS("./data_munging/rds/crispr_signif_allmut_deleterious_gene.rds")
knitr::kable(filter(crispr_signif_allmut_del, p < 0.01)[, c("Hugo_Symbol", "p", "p.adj", "p.format", "p.signif", "p.signif.adj")], caption = "Wilcoxon test results comparing deleterious mutant vs other cell lines, p < 0.01 (BH-adjusted p-values: * p <= 0.05, ** p <= 0.01, *** p <= 0.001, **** p <= 0.0001)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width = "900px", height = "450px")
| Hugo_Symbol | p | p.adj | p.format | p.signif | p.signif.adj |
|---|---|---|---|---|---|
| PTEN | 0.0000000 | 0.0000000 | 2.9e-13 | **** | **** |
| TP53 | 0.0000002 | 0.0016540 | 2.5e-07 | **** | ** |
| ARID1A | 0.0000011 | 0.0047700 | 1.1e-06 | **** | ** |
| VHL | 0.0000016 | 0.0053734 | 1.6e-06 | **** | ** |
| UTP20 | 0.0000301 | 0.0802533 | 3.0e-05 | **** | NA |
| KRT19 | 0.0000705 | 0.1566873 | 7.0e-05 | **** | NA |
| ZC3H13 | 0.0009557 | 0.7440112 | 0.00096 | *** | NA |
| ARID1B | 0.0010200 | 0.7440112 | 0.00102 | ** | NA |
| ABCD4 | 0.0012457 | 0.7440112 | 0.00125 | ** | NA |
| TCERG1 | 0.0012735 | 0.7440112 | 0.00127 | ** | NA |
| NPSR1 | 0.0013941 | 0.7440112 | 0.00139 | ** | NA |
| KRTAP10-6 | 0.0015323 | 0.7440112 | 0.00153 | ** | NA |
| PLXNA1 | 0.0016291 | 0.7440112 | 0.00163 | ** | NA |
| LILRB2 | 0.0016968 | 0.7440112 | 0.00170 | ** | NA |
| RNF208 | 0.0019385 | 0.7440112 | 0.00194 | ** | NA |
| AJAP1 | 0.0021018 | 0.7440112 | 0.00210 | ** | NA |
| HAX1 | 0.0022315 | 0.7440112 | 0.00223 | ** | NA |
| TIMD4 | 0.0022527 | 0.7440112 | 0.00225 | ** | NA |
| PIGR | 0.0028924 | 0.7440112 | 0.00289 | ** | NA |
| TP53BP2 | 0.0029507 | 0.7440112 | 0.00295 | ** | NA |
| MSH6 | 0.0031226 | 0.7440112 | 0.00312 | ** | NA |
| KLHL17 | 0.0038240 | 0.7440112 | 0.00382 | ** | NA |
| TUBGCP5 | 0.0038993 | 0.7440112 | 0.00390 | ** | NA |
| FMN1 | 0.0039763 | 0.7440112 | 0.00398 | ** | NA |
| AKAP13 | 0.0040426 | 0.7440112 | 0.00404 | ** | NA |
| UGT2A3 | 0.0041569 | 0.7440112 | 0.00416 | ** | NA |
| BRD4 | 0.0042237 | 0.7440112 | 0.00422 | ** | NA |
| RB1 | 0.0042661 | 0.7440112 | 0.00427 | ** | NA |
| NIPBL | 0.0042726 | 0.7440112 | 0.00427 | ** | NA |
| ATAD5 | 0.0045229 | 0.7440112 | 0.00452 | ** | NA |
| CR1 | 0.0047742 | 0.7440112 | 0.00477 | ** | NA |
| PPIL4 | 0.0047926 | 0.7440112 | 0.00479 | ** | NA |
| RINT1 | 0.0048906 | 0.7440112 | 0.00489 | ** | NA |
| SIN3A | 0.0049114 | 0.7440112 | 0.00491 | ** | NA |
| PIK3R1 | 0.0052016 | 0.7440112 | 0.00520 | ** | NA |
| TPR | 0.0052404 | 0.7440112 | 0.00524 | ** | NA |
| DLD | 0.0053697 | 0.7440112 | 0.00537 | ** | NA |
| PTPRR | 0.0055220 | 0.7440112 | 0.00552 | ** | NA |
| CSTF3 | 0.0055891 | 0.7440112 | 0.00559 | ** | NA |
| OLFML2B | 0.0058315 | 0.7440112 | 0.00583 | ** | NA |
| GSTM5 | 0.0058384 | 0.7440112 | 0.00584 | ** | NA |
| NOC4L | 0.0061581 | 0.7440112 | 0.00616 | ** | NA |
| KIAA1107 | 0.0061991 | 0.7440112 | 0.00620 | ** | NA |
| RTN3 | 0.0062371 | 0.7440112 | 0.00624 | ** | NA |
| PLOD3 | 0.0062880 | 0.7440112 | 0.00629 | ** | NA |
| ZNF439 | 0.0063457 | 0.7440112 | 0.00635 | ** | NA |
| DMKN | 0.0063484 | 0.7440112 | 0.00635 | ** | NA |
| EDNRB | 0.0063517 | 0.7440112 | 0.00635 | ** | NA |
| UTP3 | 0.0065359 | 0.7440112 | 0.00654 | ** | NA |
| IQCH | 0.0067700 | 0.7440112 | 0.00677 | ** | NA |
| ST7 | 0.0068047 | 0.7440112 | 0.00680 | ** | NA |
| ZNF177 | 0.0068619 | 0.7440112 | 0.00686 | ** | NA |
| ENAH | 0.0069381 | 0.7440112 | 0.00694 | ** | NA |
| RTF1 | 0.0070416 | 0.7440112 | 0.00704 | ** | NA |
| UTS2B | 0.0072533 | 0.7440112 | 0.00725 | ** | NA |
| TRIM13 | 0.0073672 | 0.7440112 | 0.00737 | ** | NA |
| KLHL1 | 0.0073975 | 0.7440112 | 0.00740 | ** | NA |
| AMER2 | 0.0076915 | 0.7440112 | 0.00769 | ** | NA |
| DMWD | 0.0077073 | 0.7440112 | 0.00771 | ** | NA |
| ATG2A | 0.0077283 | 0.7440112 | 0.00773 | ** | NA |
| NPAT | 0.0078418 | 0.7440112 | 0.00784 | ** | NA |
| SMARCB1 | 0.0081997 | 0.7440112 | 0.00820 | ** | NA |
| PAQR3 | 0.0082276 | 0.7440112 | 0.00823 | ** | NA |
| SF3B2 | 0.0084940 | 0.7440112 | 0.00849 | ** | NA |
| NGRN | 0.0085338 | 0.7440112 | 0.00853 | ** | NA |
| SKOR1 | 0.0085338 | 0.7440112 | 0.00853 | ** | NA |
| PHLPP1 | 0.0085991 | 0.7440112 | 0.00860 | ** | NA |
| ZBTB25 | 0.0086381 | 0.7440112 | 0.00864 | ** | NA |
| ZFR | 0.0087353 | 0.7440112 | 0.00874 | ** | NA |
| ZNF124 | 0.0089578 | 0.7440112 | 0.00896 | ** | NA |
| TOP1MT | 0.0089680 | 0.7440112 | 0.00897 | ** | NA |
| NUP98 | 0.0091458 | 0.7440112 | 0.00915 | ** | NA |
| THEMIS | 0.0092069 | 0.7440112 | 0.00921 | ** | NA |
| ESYT2 | 0.0092528 | 0.7440112 | 0.00925 | ** | NA |
| DZANK1 | 0.0092793 | 0.7440112 | 0.00928 | ** | NA |
| SRPR | 0.0092882 | 0.7440112 | 0.00929 | ** | NA |
| PAX5 | 0.0093106 | 0.7440112 | 0.00931 | ** | NA |
| ST5 | 0.0094802 | 0.7440112 | 0.00948 | ** | NA |
| TTC22 | 0.0094965 | 0.7440112 | 0.00950 | ** | NA |
| BUB1 | 0.0095144 | 0.7440112 | 0.00951 | ** | NA |
| RIMBP3 | 0.0096462 | 0.7440112 | 0.00965 | ** | NA |
wilcox_gene_allmut_plot <- ggplot(data = crispr_signif_allmut_del) +
geom_histogram(aes(x = p, fill = "chartreuse4"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
geom_histogram(aes(x = p.adj, fill = "darkslategray3"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
scale_x_continuous(breaks = seq(0, 1, by = 0.05), labels = seq(0, 1, by = 0.05)) +
scale_fill_manual(name = "P-values", values = c("chartreuse4" = "chartreuse4", "darkslategray3" = "darkslategray3"), labels = c("Unadjusted", "BH-adjusted")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = c(0.1, 0.85)) +
labs(x = "BH-adjusted p-values", y = "Frequency")
wilcox_gene_allmut_plot
# ggsave(filename = "./plots_18Q3/crispr/pvals_genes.pdf", plot = wilcox_gene_plot, width = 8, height = 4, device = "pdf")
crispr_signif_allmut_nonsilent <- compare_means(Score ~ Mutation_Status_Nonsilent, group.by = c("Hugo_Symbol"), data = crispr_data, method = "wilcox.test", p.adjust.method = "BH")
crispr_signif_allmut_nonsilent <- adj_signif(crispr_signif_allmut_nonsilent)
crispr_signif_allmut_nonsilent <- crispr_signif_allmut_nonsilent[order(crispr_signif_allmut_nonsilent$p),]
saveRDS(crispr_signif_allmut_nonsilent, "./data_munging/rds/crispr_signif_allmut_nonsilent_gene.rds")
# write.table(crispr_signif_allmut_nonsilent, file = "~/Desktop/crispr_signif_allmut_nonsilent_gene.csv", quote = FALSE, sep = ",", row.names = FALSE)
crispr_signif_allmut_nonsilent <- readRDS("./data_munging/rds/crispr_signif_allmut_nonsilent_gene.rds")
knitr::kable(filter(crispr_signif_allmut_nonsilent, p < 0.01)[, c("Hugo_Symbol", "p", "p.adj", "p.format", "p.signif", "p.signif.adj")], caption = "Wilcoxon test results comparing non-silent mutant vs other cell lines, p < 0.01 (BH-adjusted p-values: * p <= 0.05, ** p <= 0.01, *** p <= 0.001, **** p <= 0.0001)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width = "900px", height = "450px")
| Hugo_Symbol | p | p.adj | p.format | p.signif | p.signif.adj |
|---|---|---|---|---|---|
| TP53 | 0.0000000 | 0.0000000 | < 2e-16 | **** | **** |
| KRAS | 0.0000000 | 0.0000000 | < 2e-16 | **** | **** |
| NRAS | 0.0000000 | 0.0000000 | < 2e-16 | **** | **** |
| BRAF | 0.0000000 | 0.0000000 | < 2e-16 | **** | **** |
| PTEN | 0.0000000 | 0.0000000 | < 2e-16 | **** | **** |
| PIK3CA | 0.0000000 | 0.0000000 | 1.0e-13 | **** | **** |
| PIK3R1 | 0.0000001 | 0.0003379 | 1.4e-07 | **** | *** |
| CTNNB1 | 0.0000023 | 0.0050434 | 2.3e-06 | **** | ** |
| TCERG1 | 0.0000055 | 0.0104819 | 5.5e-06 | **** |
|
| ARID1A | 0.0000065 | 0.0111737 | 6.5e-06 | **** |
|
| TPR | 0.0000510 | 0.0797535 | 5.1e-05 | **** | NA |
| VHL | 0.0000665 | 0.0952975 | 6.7e-05 | **** | NA |
| FCGBP | 0.0000895 | 0.1184104 | 9.0e-05 | **** | NA |
| ZNF177 | 0.0001226 | 0.1505314 | 0.00012 | *** | NA |
| C14orf39 | 0.0002001 | 0.2293087 | 0.00020 | *** | NA |
| SLC22A9 | 0.0002503 | 0.2677702 | 0.00025 | *** | NA |
| GSTM5 | 0.0002648 | 0.2677702 | 0.00026 | *** | NA |
| NIPBL | 0.0003016 | 0.2880836 | 0.00030 | *** | NA |
| PIGW | 0.0003638 | 0.3291939 | 0.00036 | *** | NA |
| TLX2 | 0.0004787 | 0.4114775 | 0.00048 | *** | NA |
| ARFGAP1 | 0.0005157 | 0.4159808 | 0.00052 | *** | NA |
| ZNF808 | 0.0005323 | 0.4159808 | 0.00053 | *** | NA |
| CD320 | 0.0005816 | 0.4347206 | 0.00058 | *** | NA |
| ING2 | 0.0006989 | 0.4568910 | 0.00070 | *** | NA |
| MAT2B | 0.0007184 | 0.4568910 | 0.00072 | *** | NA |
| SYNPO2L | 0.0007699 | 0.4568910 | 0.00077 | *** | NA |
| UTP20 | 0.0007718 | 0.4568910 | 0.00077 | *** | NA |
| HRAS | 0.0008408 | 0.4568910 | 0.00084 | *** | NA |
| TAOK2 | 0.0008437 | 0.4568910 | 0.00084 | *** | NA |
| CPSF1 | 0.0008505 | 0.4568910 | 0.00085 | *** | NA |
| UTP3 | 0.0008549 | 0.4568910 | 0.00085 | *** | NA |
| RFC1 | 0.0008787 | 0.4568910 | 0.00088 | *** | NA |
| RNF208 | 0.0008907 | 0.4568910 | 0.00089 | *** | NA |
| KHSRP | 0.0009036 | 0.4568910 | 0.00090 | *** | NA |
| KCNIP4 | 0.0009472 | 0.4652404 | 0.00095 | *** | NA |
| GOLGA3 | 0.0010340 | 0.4841935 | 0.00103 | ** | NA |
| PROM1 | 0.0010485 | 0.4841935 | 0.00105 | ** | NA |
| PMPCA | 0.0010990 | 0.4841935 | 0.00110 | ** | NA |
| MDN1 | 0.0011415 | 0.4841935 | 0.00114 | ** | NA |
| PCDHA8 | 0.0012404 | 0.4841935 | 0.00124 | ** | NA |
| OR13C4 | 0.0012702 | 0.4841935 | 0.00127 | ** | NA |
| FASTKD2 | 0.0012728 | 0.4841935 | 0.00127 | ** | NA |
| TELO2 | 0.0012922 | 0.4841935 | 0.00129 | ** | NA |
| TAS1R2 | 0.0013163 | 0.4841935 | 0.00132 | ** | NA |
| ZNF439 | 0.0013227 | 0.4841935 | 0.00132 | ** | NA |
| KRT19 | 0.0013501 | 0.4841935 | 0.00135 | ** | NA |
| AVEN | 0.0013562 | 0.4841935 | 0.00136 | ** | NA |
| MTMR2 | 0.0014414 | 0.4841935 | 0.00144 | ** | NA |
| E2F1 | 0.0014486 | 0.4841935 | 0.00145 | ** | NA |
| IQCH | 0.0014602 | 0.4841935 | 0.00146 | ** | NA |
| LPO | 0.0014636 | 0.4841935 | 0.00146 | ** | NA |
| FRMD4B | 0.0014758 | 0.4841935 | 0.00148 | ** | NA |
| PLA2G4F | 0.0014997 | 0.4841935 | 0.00150 | ** | NA |
| DSEL | 0.0015209 | 0.4841935 | 0.00152 | ** | NA |
| HSD3B7 | 0.0015616 | 0.4880853 | 0.00156 | ** | NA |
| PCDHB15 | 0.0016370 | 0.4937603 | 0.00164 | ** | NA |
| TAAR1 | 0.0016765 | 0.4937603 | 0.00168 | ** | NA |
| MYH13 | 0.0016962 | 0.4937603 | 0.00170 | ** | NA |
| ACSM2B | 0.0017107 | 0.4937603 | 0.00171 | ** | NA |
| OR8D1 | 0.0017233 | 0.4937603 | 0.00172 | ** | NA |
| KIAA1211L | 0.0019402 | 0.5370893 | 0.00194 | ** | NA |
| SENP8 | 0.0019559 | 0.5370893 | 0.00196 | ** | NA |
| RUVBL1 | 0.0019950 | 0.5370893 | 0.00200 | ** | NA |
| OR52E8 | 0.0020406 | 0.5370893 | 0.00204 | ** | NA |
| CNOT1 | 0.0020488 | 0.5370893 | 0.00205 | ** | NA |
| PI4K2A | 0.0020961 | 0.5370893 | 0.00210 | ** | NA |
| KPNA6 | 0.0021334 | 0.5370893 | 0.00213 | ** | NA |
| ST6GALNAC1 | 0.0022068 | 0.5370893 | 0.00221 | ** | NA |
| POU2F1 | 0.0022102 | 0.5370893 | 0.00221 | ** | NA |
| BTBD11 | 0.0022318 | 0.5370893 | 0.00223 | ** | NA |
| LPCAT4 | 0.0022875 | 0.5370893 | 0.00229 | ** | NA |
| LILRB2 | 0.0023004 | 0.5370893 | 0.00230 | ** | NA |
| ANKRD31 | 0.0023475 | 0.5370893 | 0.00235 | ** | NA |
| OR51T1 | 0.0023614 | 0.5370893 | 0.00236 | ** | NA |
| METTL17 | 0.0023923 | 0.5370893 | 0.00239 | ** | NA |
| POLG | 0.0024034 | 0.5370893 | 0.00240 | ** | NA |
| CIITA | 0.0024057 | 0.5370893 | 0.00241 | ** | NA |
| ADAM15 | 0.0024806 | 0.5467237 | 0.00248 | ** | NA |
| RAB43 | 0.0025433 | 0.5500972 | 0.00254 | ** | NA |
| EZH2 | 0.0025733 | 0.5500972 | 0.00257 | ** | NA |
| ZDHHC24 | 0.0025919 | 0.5500972 | 0.00259 | ** | NA |
| NFE2L2 | 0.0026889 | 0.5525281 | 0.00269 | ** | NA |
| SURF6 | 0.0027361 | 0.5525281 | 0.00274 | ** | NA |
| KIF11 | 0.0027512 | 0.5525281 | 0.00275 | ** | NA |
| SDHD | 0.0027619 | 0.5525281 | 0.00276 | ** | NA |
| GRM7 | 0.0027641 | 0.5525281 | 0.00276 | ** | NA |
| NXPH4 | 0.0028292 | 0.5590491 | 0.00283 | ** | NA |
| DENND4C | 0.0028765 | 0.5604565 | 0.00288 | ** | NA |
| LRGUK | 0.0029016 | 0.5604565 | 0.00290 | ** | NA |
| SAMM50 | 0.0030102 | 0.5719308 | 0.00301 | ** | NA |
| ACTA1 | 0.0030465 | 0.5719308 | 0.00305 | ** | NA |
| SPAG17 | 0.0030697 | 0.5719308 | 0.00307 | ** | NA |
| SLC26A2 | 0.0030940 | 0.5719308 | 0.00309 | ** | NA |
| MAP2K3 | 0.0032831 | 0.5896613 | 0.00328 | ** | NA |
| DIS3L2 | 0.0032847 | 0.5896613 | 0.00328 | ** | NA |
| PTPRE | 0.0032929 | 0.5896613 | 0.00329 | ** | NA |
| SCG2 | 0.0034839 | 0.6121167 | 0.00348 | ** | NA |
| TRIM8 | 0.0034895 | 0.6121167 | 0.00349 | ** | NA |
| CAMK2B | 0.0035496 | 0.6128833 | 0.00355 | ** | NA |
| SLC38A7 | 0.0036411 | 0.6128833 | 0.00364 | ** | NA |
| MMRN2 | 0.0036540 | 0.6128833 | 0.00365 | ** | NA |
| MAML2 | 0.0037079 | 0.6128833 | 0.00371 | ** | NA |
| SGCD | 0.0037645 | 0.6128833 | 0.00376 | ** | NA |
| FBXW7 | 0.0037715 | 0.6128833 | 0.00377 | ** | NA |
| ATXN2 | 0.0037776 | 0.6128833 | 0.00378 | ** | NA |
| VPS13D | 0.0037846 | 0.6128833 | 0.00378 | ** | NA |
| KRT83 | 0.0038147 | 0.6128833 | 0.00381 | ** | NA |
| SUSD5 | 0.0039261 | 0.6249422 | 0.00393 | ** | NA |
| ATR | 0.0040408 | 0.6288252 | 0.00404 | ** | NA |
| ATXN2L | 0.0041364 | 0.6288252 | 0.00414 | ** | NA |
| CD83 | 0.0041819 | 0.6288252 | 0.00418 | ** | NA |
| ZNF135 | 0.0042047 | 0.6288252 | 0.00420 | ** | NA |
| CREBBP | 0.0042381 | 0.6288252 | 0.00424 | ** | NA |
| TOP1MT | 0.0042412 | 0.6288252 | 0.00424 | ** | NA |
| EMILIN3 | 0.0042659 | 0.6288252 | 0.00427 | ** | NA |
| SPTBN4 | 0.0042799 | 0.6288252 | 0.00428 | ** | NA |
| SPHK2 | 0.0043100 | 0.6288252 | 0.00431 | ** | NA |
| GNAI2 | 0.0043171 | 0.6288252 | 0.00432 | ** | NA |
| PSMB4 | 0.0043587 | 0.6288252 | 0.00436 | ** | NA |
| TPP1 | 0.0046001 | 0.6288252 | 0.00460 | ** | NA |
| NFE2L1 | 0.0046446 | 0.6288252 | 0.00464 | ** | NA |
| PIGP | 0.0046922 | 0.6288252 | 0.00469 | ** | NA |
| PPP1R1C | 0.0047398 | 0.6288252 | 0.00474 | ** | NA |
| CDC37 | 0.0047810 | 0.6288252 | 0.00478 | ** | NA |
| TFAP2A | 0.0048201 | 0.6288252 | 0.00482 | ** | NA |
| CD200R1 | 0.0048862 | 0.6288252 | 0.00489 | ** | NA |
| CWH43 | 0.0048913 | 0.6288252 | 0.00489 | ** | NA |
| FOXRED1 | 0.0049144 | 0.6288252 | 0.00491 | ** | NA |
| HCRTR1 | 0.0049160 | 0.6288252 | 0.00492 | ** | NA |
| CACNA1I | 0.0050408 | 0.6288252 | 0.00504 | ** | NA |
| RBBP9 | 0.0050826 | 0.6288252 | 0.00508 | ** | NA |
| OGFOD3 | 0.0051338 | 0.6288252 | 0.00513 | ** | NA |
| AKR1C1 | 0.0051682 | 0.6288252 | 0.00517 | ** | NA |
| TBC1D22A | 0.0052429 | 0.6288252 | 0.00524 | ** | NA |
| APOBEC1 | 0.0052779 | 0.6288252 | 0.00528 | ** | NA |
| OR56A3 | 0.0052916 | 0.6288252 | 0.00529 | ** | NA |
| SLC46A2 | 0.0053108 | 0.6288252 | 0.00531 | ** | NA |
| STK17B | 0.0053176 | 0.6288252 | 0.00532 | ** | NA |
| C6orf15 | 0.0053410 | 0.6288252 | 0.00534 | ** | NA |
| SMPD1 | 0.0053513 | 0.6288252 | 0.00535 | ** | NA |
| FBXW12 | 0.0053691 | 0.6288252 | 0.00537 | ** | NA |
| MGA | 0.0053870 | 0.6288252 | 0.00539 | ** | NA |
| RSPH6A | 0.0053903 | 0.6288252 | 0.00539 | ** | NA |
| EEF2 | 0.0054341 | 0.6288252 | 0.00543 | ** | NA |
| CD5L | 0.0054356 | 0.6288252 | 0.00544 | ** | NA |
| AKR1B15 | 0.0054791 | 0.6288252 | 0.00548 | ** | NA |
| THADA | 0.0055256 | 0.6288252 | 0.00553 | ** | NA |
| MYOC | 0.0055382 | 0.6288252 | 0.00554 | ** | NA |
| SLC12A4 | 0.0056227 | 0.6288252 | 0.00562 | ** | NA |
| DNAJC5B | 0.0057016 | 0.6288252 | 0.00570 | ** | NA |
| TAS2R60 | 0.0057034 | 0.6288252 | 0.00570 | ** | NA |
| IBA57 | 0.0057420 | 0.6288252 | 0.00574 | ** | NA |
| HDGFRP2 | 0.0057596 | 0.6288252 | 0.00576 | ** | NA |
| MRPS34 | 0.0057928 | 0.6288252 | 0.00579 | ** | NA |
| AP5B1 | 0.0057997 | 0.6288252 | 0.00580 | ** | NA |
| KIF3B | 0.0058153 | 0.6288252 | 0.00582 | ** | NA |
| SSH2 | 0.0058405 | 0.6288252 | 0.00584 | ** | NA |
| MBD3L2 | 0.0058844 | 0.6288252 | 0.00588 | ** | NA |
| COPS2 | 0.0059315 | 0.6288252 | 0.00593 | ** | NA |
| WDR75 | 0.0059969 | 0.6288252 | 0.00600 | ** | NA |
| FRYL | 0.0060235 | 0.6288252 | 0.00602 | ** | NA |
| ZNF438 | 0.0060372 | 0.6288252 | 0.00604 | ** | NA |
| ELK3 | 0.0061135 | 0.6288252 | 0.00611 | ** | NA |
| MUS81 | 0.0061273 | 0.6288252 | 0.00613 | ** | NA |
| PRRC1 | 0.0061280 | 0.6288252 | 0.00613 | ** | NA |
| CLEC9A | 0.0062087 | 0.6288252 | 0.00621 | ** | NA |
| LMX1A | 0.0062097 | 0.6288252 | 0.00621 | ** | NA |
| SFXN3 | 0.0062354 | 0.6288252 | 0.00624 | ** | NA |
| FAM181B | 0.0062849 | 0.6288252 | 0.00628 | ** | NA |
| SMARCB1 | 0.0062993 | 0.6288252 | 0.00630 | ** | NA |
| ACBD6 | 0.0063036 | 0.6288252 | 0.00630 | ** | NA |
| SUPT7L | 0.0063204 | 0.6288252 | 0.00632 | ** | NA |
| CYP19A1 | 0.0063281 | 0.6288252 | 0.00633 | ** | NA |
| FBXL20 | 0.0064106 | 0.6333572 | 0.00641 | ** | NA |
| OR4N2 | 0.0065455 | 0.6387591 | 0.00655 | ** | NA |
| MIIP | 0.0066145 | 0.6387591 | 0.00661 | ** | NA |
| FCN3 | 0.0066248 | 0.6387591 | 0.00662 | ** | NA |
| ZNF107 | 0.0066752 | 0.6387591 | 0.00668 | ** | NA |
| MLST8 | 0.0067360 | 0.6387591 | 0.00674 | ** | NA |
| TSPAN13 | 0.0067540 | 0.6387591 | 0.00675 | ** | NA |
| DPP7 | 0.0067839 | 0.6387591 | 0.00678 | ** | NA |
| KDM5B | 0.0068837 | 0.6387591 | 0.00688 | ** | NA |
| IFFO2 | 0.0068857 | 0.6387591 | 0.00689 | ** | NA |
| COG3 | 0.0069083 | 0.6387591 | 0.00691 | ** | NA |
| DHX35 | 0.0069214 | 0.6387591 | 0.00692 | ** | NA |
| PPM1A | 0.0069353 | 0.6387591 | 0.00694 | ** | NA |
| MSGN1 | 0.0069483 | 0.6387591 | 0.00695 | ** | NA |
| ANKRD32 | 0.0071033 | 0.6427900 | 0.00710 | ** | NA |
| TLN1 | 0.0071319 | 0.6427900 | 0.00713 | ** | NA |
| POC1A | 0.0071368 | 0.6427900 | 0.00714 | ** | NA |
| CLDN5 | 0.0071709 | 0.6427900 | 0.00717 | ** | NA |
| PRKCZ | 0.0072554 | 0.6427900 | 0.00726 | ** | NA |
| ACTL8 | 0.0072969 | 0.6427900 | 0.00730 | ** | NA |
| KRTAP10-6 | 0.0072992 | 0.6427900 | 0.00730 | ** | NA |
| SMCR8 | 0.0073359 | 0.6427900 | 0.00734 | ** | NA |
| OLFML2B | 0.0073612 | 0.6427900 | 0.00736 | ** | NA |
| HSPB2 | 0.0073660 | 0.6427900 | 0.00737 | ** | NA |
| INTS12 | 0.0075473 | 0.6534174 | 0.00755 | ** | NA |
| NAA15 | 0.0075638 | 0.6534174 | 0.00756 | ** | NA |
| ARSB | 0.0076069 | 0.6538525 | 0.00761 | ** | NA |
| VPS37B | 0.0076704 | 0.6545014 | 0.00767 | ** | NA |
| CCDC159 | 0.0077369 | 0.6545014 | 0.00774 | ** | NA |
| SSH1 | 0.0077665 | 0.6545014 | 0.00777 | ** | NA |
| NCR3LG1 | 0.0078580 | 0.6545014 | 0.00786 | ** | NA |
| ACYP2 | 0.0079841 | 0.6545014 | 0.00798 | ** | NA |
| XPO5 | 0.0080510 | 0.6545014 | 0.00805 | ** | NA |
| GTF3C1 | 0.0080700 | 0.6545014 | 0.00807 | ** | NA |
| OR4S2 | 0.0081035 | 0.6545014 | 0.00810 | ** | NA |
| BTAF1 | 0.0081559 | 0.6545014 | 0.00816 | ** | NA |
| TACSTD2 | 0.0081791 | 0.6545014 | 0.00818 | ** | NA |
| MED30 | 0.0082419 | 0.6545014 | 0.00824 | ** | NA |
| ADAM32 | 0.0082522 | 0.6545014 | 0.00825 | ** | NA |
| LY9 | 0.0083045 | 0.6545014 | 0.00830 | ** | NA |
| H2AFY2 | 0.0083475 | 0.6545014 | 0.00835 | ** | NA |
| MRPL13 | 0.0083762 | 0.6545014 | 0.00838 | ** | NA |
| PAQR3 | 0.0084147 | 0.6545014 | 0.00841 | ** | NA |
| GHITM | 0.0084474 | 0.6545014 | 0.00845 | ** | NA |
| SGK2 | 0.0084586 | 0.6545014 | 0.00846 | ** | NA |
| ADO | 0.0085378 | 0.6545014 | 0.00854 | ** | NA |
| ZNF17 | 0.0085562 | 0.6545014 | 0.00856 | ** | NA |
| MFAP4 | 0.0085654 | 0.6545014 | 0.00857 | ** | NA |
| CD163 | 0.0085663 | 0.6545014 | 0.00857 | ** | NA |
| COL7A1 | 0.0085733 | 0.6545014 | 0.00857 | ** | NA |
| CD80 | 0.0085983 | 0.6545014 | 0.00860 | ** | NA |
| ARMC7 | 0.0085988 | 0.6545014 | 0.00860 | ** | NA |
| COL8A1 | 0.0086043 | 0.6545014 | 0.00860 | ** | NA |
| KHDC1L | 0.0086562 | 0.6546529 | 0.00866 | ** | NA |
| CHD2 | 0.0086825 | 0.6546529 | 0.00868 | ** | NA |
| PLCH2 | 0.0088368 | 0.6560317 | 0.00884 | ** | NA |
| OR13C3 | 0.0088398 | 0.6560317 | 0.00884 | ** | NA |
| C1orf141 | 0.0089498 | 0.6560317 | 0.00895 | ** | NA |
| ARHGEF17 | 0.0089894 | 0.6560317 | 0.00899 | ** | NA |
| ARID1B | 0.0090009 | 0.6560317 | 0.00900 | ** | NA |
| COL9A2 | 0.0090078 | 0.6560317 | 0.00901 | ** | NA |
| ZC3H4 | 0.0091088 | 0.6560317 | 0.00911 | ** | NA |
| SUPT6H | 0.0091374 | 0.6560317 | 0.00914 | ** | NA |
| CCNL1 | 0.0091597 | 0.6560317 | 0.00916 | ** | NA |
| TMEM258 | 0.0091769 | 0.6560317 | 0.00918 | ** | NA |
| USP32 | 0.0092096 | 0.6560317 | 0.00921 | ** | NA |
| DNAJC8 | 0.0093058 | 0.6560317 | 0.00931 | ** | NA |
| C20orf26 | 0.0093466 | 0.6560317 | 0.00935 | ** | NA |
| KRTAP4-4 | 0.0093494 | 0.6560317 | 0.00935 | ** | NA |
| ACHE | 0.0093636 | 0.6560317 | 0.00936 | ** | NA |
| LCTL | 0.0093837 | 0.6560317 | 0.00938 | ** | NA |
| ELMSAN1 | 0.0093954 | 0.6560317 | 0.00940 | ** | NA |
| SLC25A4 | 0.0094344 | 0.6560317 | 0.00943 | ** | NA |
| F11R | 0.0094708 | 0.6560317 | 0.00947 | ** | NA |
| LY6G6F | 0.0095144 | 0.6560317 | 0.00951 | ** | NA |
| ZNF330 | 0.0095171 | 0.6560317 | 0.00952 | ** | NA |
| NECAB1 | 0.0095689 | 0.6560317 | 0.00957 | ** | NA |
| GMCL1 | 0.0096195 | 0.6560317 | 0.00962 | ** | NA |
| C16orf58 | 0.0096729 | 0.6560317 | 0.00967 | ** | NA |
| ZBTB44 | 0.0096904 | 0.6560317 | 0.00969 | ** | NA |
| INHBA | 0.0097282 | 0.6560317 | 0.00973 | ** | NA |
| CLEC4C | 0.0097545 | 0.6560317 | 0.00975 | ** | NA |
| GLG1 | 0.0097693 | 0.6560317 | 0.00977 | ** | NA |
| RNF31 | 0.0098119 | 0.6563315 | 0.00981 | ** | NA |
| COPS3 | 0.0098996 | 0.6577057 | 0.00990 | ** | NA |
| NOL6 | 0.0099717 | 0.6577057 | 0.00997 | ** | NA |
| WDR77 | 0.0099778 | 0.6577057 | 0.00998 | ** | NA |
wilcox_gene_allmut_nonsilent_plot <- ggplot(data = crispr_signif_allmut_nonsilent) +
geom_histogram(aes(x = p, fill = "chartreuse4"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
geom_histogram(aes(x = p.adj, fill = "darkslategray3"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
scale_x_continuous(breaks = seq(0, 1, by = 0.05), labels = seq(0, 1, by = 0.05)) +
scale_fill_manual(name = "P-values", values = c("chartreuse4" = "chartreuse4", "darkslategray3" = "darkslategray3"), labels = c("Unadjusted", "BH-adjusted")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = c(0.1, 0.85)) +
labs(x = "BH-adjusted p-values", y = "Frequency")
wilcox_gene_allmut_nonsilent_plot
crispr_signif_allmut_delmis <- compare_means(Score ~ Mutation_Status_DeleteriousMissense, group.by = c("Hugo_Symbol"), data = crispr_data, method = "wilcox.test", p.adjust.method = "BH")
crispr_signif_allmut_delmis <- adj_signif(crispr_signif_allmut_delmis)
crispr_signif_allmut_delmis <- crispr_signif_allmut_delmis[order(crispr_signif_allmut_delmis$p),]
saveRDS(crispr_signif_allmut_delmis, "./data_munging/rds/crispr_signif_allmut_deleteriousmissense_gene.rds")
# write.table(crispr_signif_allmut_delmis, file = "~/Desktop/crispr_signif_allmut_deleteriousmissense_gene.csv", quote = FALSE, sep = ",", row.names = FALSE)
crispr_signif_allmut_delmis <- readRDS("./data_munging/rds/crispr_signif_allmut_deleteriousmissense_gene.rds")
knitr::kable(filter(crispr_signif_allmut_delmis, p < 0.01)[, c("Hugo_Symbol", "p", "p.adj", "p.format", "p.signif", "p.signif.adj")], caption = "Wilcoxon test results comparing deleterious and missense mutant vs other cell lines, p < 0.01 (BH-adjusted p-values: * p <= 0.05, ** p <= 0.01, *** p <= 0.001, **** p <= 0.0001)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width = "900px", height = "450px")
| Hugo_Symbol | p | p.adj | p.format | p.signif | p.signif.adj |
|---|---|---|---|---|---|
| KRAS | 0.0000000 | 0.0000000 | < 2e-16 | **** | **** |
| TP53 | 0.0000000 | 0.0000000 | < 2e-16 | **** | **** |
| BRAF | 0.0000000 | 0.0000000 | < 2e-16 | **** | **** |
| NRAS | 0.0000000 | 0.0000000 | < 2e-16 | **** | **** |
| PTEN | 0.0000000 | 0.0000000 | 5.1e-16 | **** | **** |
| PIK3CA | 0.0000000 | 0.0000000 | 9.7e-14 | **** | **** |
| CTNNB1 | 0.0000023 | 0.0057622 | 2.3e-06 | **** | ** |
| TCERG1 | 0.0000043 | 0.0092556 | 4.3e-06 | **** | ** |
| ARID1A | 0.0000077 | 0.0146617 | 7.7e-06 | **** |
|
| TPR | 0.0000436 | 0.0749151 | 4.4e-05 | **** | NA |
| FCGBP | 0.0000695 | 0.1085679 | 6.9e-05 | **** | NA |
| VHL | 0.0001029 | 0.1473663 | 0.00010 | *** | NA |
| ZNF177 | 0.0001226 | 0.1620635 | 0.00012 | *** | NA |
| C14orf39 | 0.0002001 | 0.2456164 | 0.00020 | *** | NA |
| SLC22A9 | 0.0002503 | 0.2844231 | 0.00025 | *** | NA |
| GSTM5 | 0.0002648 | 0.2844231 | 0.00026 | *** | NA |
| PIGW | 0.0003638 | 0.3678156 | 0.00036 | *** | NA |
| TLX2 | 0.0004787 | 0.4570643 | 0.00048 | *** | NA |
| ARFGAP1 | 0.0005157 | 0.4574458 | 0.00052 | *** | NA |
| ZNF808 | 0.0005323 | 0.4574458 | 0.00053 | *** | NA |
| CD320 | 0.0005816 | 0.4759840 | 0.00058 | *** | NA |
| ING2 | 0.0006989 | 0.4783606 | 0.00070 | *** | NA |
| NIPBL | 0.0007160 | 0.4783606 | 0.00072 | *** | NA |
| MAT2B | 0.0007184 | 0.4783606 | 0.00072 | *** | NA |
| SYNPO2L | 0.0007699 | 0.4783606 | 0.00077 | *** | NA |
| UTP20 | 0.0007718 | 0.4783606 | 0.00077 | *** | NA |
| HRAS | 0.0008408 | 0.4783606 | 0.00084 | *** | NA |
| CPSF1 | 0.0008505 | 0.4783606 | 0.00085 | *** | NA |
| UTP3 | 0.0008549 | 0.4783606 | 0.00085 | *** | NA |
| FBXW7 | 0.0008732 | 0.4783606 | 0.00087 | *** | NA |
| RFC1 | 0.0008787 | 0.4783606 | 0.00088 | *** | NA |
| RNF208 | 0.0008907 | 0.4783606 | 0.00089 | *** | NA |
| KCNIP4 | 0.0009472 | 0.4932932 | 0.00095 | *** | NA |
| GOLGA3 | 0.0010340 | 0.5125264 | 0.00103 | ** | NA |
| PROM1 | 0.0010485 | 0.5125264 | 0.00105 | ** | NA |
| PMPCA | 0.0010990 | 0.5125264 | 0.00110 | ** | NA |
| PIK3R1 | 0.0011525 | 0.5125264 | 0.00115 | ** | NA |
| PCDHA8 | 0.0012404 | 0.5125264 | 0.00124 | ** | NA |
| OR13C4 | 0.0012702 | 0.5125264 | 0.00127 | ** | NA |
| MDN1 | 0.0012746 | 0.5125264 | 0.00127 | ** | NA |
| TELO2 | 0.0012922 | 0.5125264 | 0.00129 | ** | NA |
| TAS1R2 | 0.0013163 | 0.5125264 | 0.00132 | ** | NA |
| ZNF439 | 0.0013227 | 0.5125264 | 0.00132 | ** | NA |
| KRT19 | 0.0013501 | 0.5125264 | 0.00135 | ** | NA |
| MTMR2 | 0.0014414 | 0.5125264 | 0.00144 | ** | NA |
| E2F1 | 0.0014486 | 0.5125264 | 0.00145 | ** | NA |
| IQCH | 0.0014602 | 0.5125264 | 0.00146 | ** | NA |
| LPO | 0.0014636 | 0.5125264 | 0.00146 | ** | NA |
| FRMD4B | 0.0014758 | 0.5125264 | 0.00148 | ** | NA |
| PLA2G4F | 0.0014997 | 0.5125264 | 0.00150 | ** | NA |
| DSEL | 0.0015209 | 0.5125264 | 0.00152 | ** | NA |
| HSD3B7 | 0.0015616 | 0.5160939 | 0.00156 | ** | NA |
| PCDHB15 | 0.0016370 | 0.5195965 | 0.00164 | ** | NA |
| TAAR1 | 0.0016765 | 0.5195965 | 0.00168 | ** | NA |
| MYH13 | 0.0016962 | 0.5195965 | 0.00170 | ** | NA |
| ACSM2B | 0.0017107 | 0.5195965 | 0.00171 | ** | NA |
| OR8D1 | 0.0017233 | 0.5195965 | 0.00172 | ** | NA |
| KIAA1211L | 0.0019402 | 0.5430002 | 0.00194 | ** | NA |
| SENP8 | 0.0019559 | 0.5430002 | 0.00196 | ** | NA |
| AVEN | 0.0019832 | 0.5430002 | 0.00198 | ** | NA |
| TAOK2 | 0.0019841 | 0.5430002 | 0.00198 | ** | NA |
| RUVBL1 | 0.0019950 | 0.5430002 | 0.00200 | ** | NA |
| CLEC4C | 0.0020406 | 0.5430002 | 0.00204 | ** | NA |
| OR52E8 | 0.0020406 | 0.5430002 | 0.00204 | ** | NA |
| PI4K2A | 0.0020961 | 0.5430002 | 0.00210 | ** | NA |
| KPNA6 | 0.0021334 | 0.5430002 | 0.00213 | ** | NA |
| ST6GALNAC1 | 0.0022068 | 0.5430002 | 0.00221 | ** | NA |
| POU2F1 | 0.0022102 | 0.5430002 | 0.00221 | ** | NA |
| EZH2 | 0.0022226 | 0.5430002 | 0.00222 | ** | NA |
| BTBD11 | 0.0022318 | 0.5430002 | 0.00223 | ** | NA |
| LPCAT4 | 0.0022875 | 0.5430002 | 0.00229 | ** | NA |
| ANKRD31 | 0.0023475 | 0.5430002 | 0.00235 | ** | NA |
| OR51T1 | 0.0023614 | 0.5430002 | 0.00236 | ** | NA |
| METTL17 | 0.0023923 | 0.5430002 | 0.00239 | ** | NA |
| KHSRP | 0.0023925 | 0.5430002 | 0.00239 | ** | NA |
| CIITA | 0.0024057 | 0.5430002 | 0.00241 | ** | NA |
| TUBB2A | 0.0024329 | 0.5430002 | 0.00243 | ** | NA |
| ADAM15 | 0.0024806 | 0.5465647 | 0.00248 | ** | NA |
| ZDHHC24 | 0.0025919 | 0.5638596 | 0.00259 | ** | NA |
| SURF6 | 0.0027361 | 0.5723325 | 0.00274 | ** | NA |
| KIF11 | 0.0027512 | 0.5723325 | 0.00275 | ** | NA |
| SDHD | 0.0027619 | 0.5723325 | 0.00276 | ** | NA |
| GRM7 | 0.0027641 | 0.5723325 | 0.00276 | ** | NA |
| NXPH4 | 0.0028292 | 0.5788467 | 0.00283 | ** | NA |
| SAMM50 | 0.0030102 | 0.6042511 | 0.00301 | ** | NA |
| ACTA1 | 0.0030465 | 0.6042511 | 0.00305 | ** | NA |
| SPAG17 | 0.0030697 | 0.6042511 | 0.00307 | ** | NA |
| SLC26A2 | 0.0030940 | 0.6042511 | 0.00309 | ** | NA |
| DENND4C | 0.0031465 | 0.6075857 | 0.00315 | ** | NA |
| ATXN2 | 0.0032463 | 0.6085056 | 0.00325 | ** | NA |
| MAP2K3 | 0.0032831 | 0.6085056 | 0.00328 | ** | NA |
| DIS3L2 | 0.0032847 | 0.6085056 | 0.00328 | ** | NA |
| PTPRE | 0.0032929 | 0.6085056 | 0.00329 | ** | NA |
| CNOT1 | 0.0034549 | 0.6246874 | 0.00345 | ** | NA |
| SCG2 | 0.0034839 | 0.6246874 | 0.00348 | ** | NA |
| TRIM8 | 0.0034895 | 0.6246874 | 0.00349 | ** | NA |
| CAMK2B | 0.0035496 | 0.6288942 | 0.00355 | ** | NA |
| SLC38A7 | 0.0036411 | 0.6335632 | 0.00364 | ** | NA |
| MMRN2 | 0.0036540 | 0.6335632 | 0.00365 | ** | NA |
| SGCD | 0.0037645 | 0.6335632 | 0.00376 | ** | NA |
| VPS13D | 0.0037846 | 0.6335632 | 0.00378 | ** | NA |
| KRT83 | 0.0038147 | 0.6335632 | 0.00381 | ** | NA |
| EEF2 | 0.0038279 | 0.6335632 | 0.00383 | ** | NA |
| LILRB2 | 0.0038340 | 0.6335632 | 0.00383 | ** | NA |
| SUSD5 | 0.0039261 | 0.6376846 | 0.00393 | ** | NA |
| NOL7 | 0.0039331 | 0.6376846 | 0.00393 | ** | NA |
| ATR | 0.0040408 | 0.6473519 | 0.00404 | ** | NA |
| ATXN2L | 0.0041364 | 0.6473519 | 0.00414 | ** | NA |
| CD83 | 0.0041819 | 0.6473519 | 0.00418 | ** | NA |
| ZNF135 | 0.0042047 | 0.6473519 | 0.00420 | ** | NA |
| TOP1MT | 0.0042412 | 0.6473519 | 0.00424 | ** | NA |
| EMILIN3 | 0.0042659 | 0.6473519 | 0.00427 | ** | NA |
| SPTBN4 | 0.0042799 | 0.6473519 | 0.00428 | ** | NA |
| SPHK2 | 0.0043100 | 0.6473519 | 0.00431 | ** | NA |
| PSMB4 | 0.0043587 | 0.6473519 | 0.00436 | ** | NA |
| IQGAP3 | 0.0045407 | 0.6473519 | 0.00454 | ** | NA |
| TPP1 | 0.0046001 | 0.6473519 | 0.00460 | ** | NA |
| NFE2L1 | 0.0046446 | 0.6473519 | 0.00464 | ** | NA |
| PIGP | 0.0046922 | 0.6473519 | 0.00469 | ** | NA |
| PPP1R1C | 0.0047398 | 0.6473519 | 0.00474 | ** | NA |
| CDC37 | 0.0047810 | 0.6473519 | 0.00478 | ** | NA |
| TFAP2A | 0.0048201 | 0.6473519 | 0.00482 | ** | NA |
| CD200R1 | 0.0048862 | 0.6473519 | 0.00489 | ** | NA |
| CWH43 | 0.0048913 | 0.6473519 | 0.00489 | ** | NA |
| FOXRED1 | 0.0049144 | 0.6473519 | 0.00491 | ** | NA |
| HCRTR1 | 0.0049160 | 0.6473519 | 0.00492 | ** | NA |
| RBBP9 | 0.0050826 | 0.6473519 | 0.00508 | ** | NA |
| OGFOD3 | 0.0051338 | 0.6473519 | 0.00513 | ** | NA |
| AKR1C1 | 0.0051682 | 0.6473519 | 0.00517 | ** | NA |
| TBC1D22A | 0.0052429 | 0.6473519 | 0.00524 | ** | NA |
| APOBEC1 | 0.0052779 | 0.6473519 | 0.00528 | ** | NA |
| OR56A3 | 0.0052916 | 0.6473519 | 0.00529 | ** | NA |
| SLC46A2 | 0.0053108 | 0.6473519 | 0.00531 | ** | NA |
| C6orf15 | 0.0053410 | 0.6473519 | 0.00534 | ** | NA |
| SMPD1 | 0.0053513 | 0.6473519 | 0.00535 | ** | NA |
| FBXW12 | 0.0053691 | 0.6473519 | 0.00537 | ** | NA |
| MGA | 0.0053870 | 0.6473519 | 0.00539 | ** | NA |
| RSPH6A | 0.0053903 | 0.6473519 | 0.00539 | ** | NA |
| THADA | 0.0055256 | 0.6473519 | 0.00553 | ** | NA |
| MYOC | 0.0055382 | 0.6473519 | 0.00554 | ** | NA |
| LRGUK | 0.0056014 | 0.6473519 | 0.00560 | ** | NA |
| SLC12A4 | 0.0056227 | 0.6473519 | 0.00562 | ** | NA |
| DNAJC5B | 0.0057016 | 0.6473519 | 0.00570 | ** | NA |
| TAS2R60 | 0.0057034 | 0.6473519 | 0.00570 | ** | NA |
| IBA57 | 0.0057420 | 0.6473519 | 0.00574 | ** | NA |
| HDGFRP2 | 0.0057596 | 0.6473519 | 0.00576 | ** | NA |
| MRPS34 | 0.0057928 | 0.6473519 | 0.00579 | ** | NA |
| KIF3B | 0.0058153 | 0.6473519 | 0.00582 | ** | NA |
| SSH2 | 0.0058405 | 0.6473519 | 0.00584 | ** | NA |
| MBD3L2 | 0.0058844 | 0.6473519 | 0.00588 | ** | NA |
| COPS2 | 0.0059315 | 0.6473519 | 0.00593 | ** | NA |
| WDR75 | 0.0059969 | 0.6473519 | 0.00600 | ** | NA |
| FRYL | 0.0060235 | 0.6473519 | 0.00602 | ** | NA |
| ZNF438 | 0.0060372 | 0.6473519 | 0.00604 | ** | NA |
| MMS22L | 0.0060571 | 0.6473519 | 0.00606 | ** | NA |
| ELK3 | 0.0061135 | 0.6473519 | 0.00611 | ** | NA |
| MUS81 | 0.0061273 | 0.6473519 | 0.00613 | ** | NA |
| PRRC1 | 0.0061280 | 0.6473519 | 0.00613 | ** | NA |
| CD5L | 0.0061987 | 0.6473519 | 0.00620 | ** | NA |
| CLEC9A | 0.0062087 | 0.6473519 | 0.00621 | ** | NA |
| LMX1A | 0.0062097 | 0.6473519 | 0.00621 | ** | NA |
| SFXN3 | 0.0062354 | 0.6473519 | 0.00624 | ** | NA |
| RAB43 | 0.0062666 | 0.6473519 | 0.00627 | ** | NA |
| FAM181B | 0.0062849 | 0.6473519 | 0.00628 | ** | NA |
| SMARCB1 | 0.0062993 | 0.6473519 | 0.00630 | ** | NA |
| ACBD6 | 0.0063036 | 0.6473519 | 0.00630 | ** | NA |
| SUPT7L | 0.0063204 | 0.6473519 | 0.00632 | ** | NA |
| CYP19A1 | 0.0063281 | 0.6473519 | 0.00633 | ** | NA |
| FBXL20 | 0.0064106 | 0.6511943 | 0.00641 | ** | NA |
| CEP76 | 0.0064716 | 0.6511943 | 0.00647 | ** | NA |
| DPP7 | 0.0064794 | 0.6511943 | 0.00648 | ** | NA |
| OR4N2 | 0.0065455 | 0.6540139 | 0.00655 | ** | NA |
| MIIP | 0.0066145 | 0.6543288 | 0.00661 | ** | NA |
| FCN3 | 0.0066248 | 0.6543288 | 0.00662 | ** | NA |
| ZNF107 | 0.0066752 | 0.6555409 | 0.00668 | ** | NA |
| TSPAN13 | 0.0067540 | 0.6595153 | 0.00675 | ** | NA |
| KDM5B | 0.0068837 | 0.6597415 | 0.00688 | ** | NA |
| IFFO2 | 0.0068857 | 0.6597415 | 0.00689 | ** | NA |
| COG3 | 0.0069083 | 0.6597415 | 0.00691 | ** | NA |
| DHX35 | 0.0069214 | 0.6597415 | 0.00692 | ** | NA |
| MSGN1 | 0.0069483 | 0.6597415 | 0.00695 | ** | NA |
| ANKRD32 | 0.0071033 | 0.6599783 | 0.00710 | ** | NA |
| RB1 | 0.0071617 | 0.6599783 | 0.00716 | ** | NA |
| CLDN5 | 0.0071709 | 0.6599783 | 0.00717 | ** | NA |
| PRKCZ | 0.0072554 | 0.6599783 | 0.00726 | ** | NA |
| ARID1B | 0.0072590 | 0.6599783 | 0.00726 | ** | NA |
| ACTL8 | 0.0072969 | 0.6599783 | 0.00730 | ** | NA |
| KRTAP10-6 | 0.0072992 | 0.6599783 | 0.00730 | ** | NA |
| EML1 | 0.0073078 | 0.6599783 | 0.00731 | ** | NA |
| SMCR8 | 0.0073359 | 0.6599783 | 0.00734 | ** | NA |
| OLFML2B | 0.0073612 | 0.6599783 | 0.00736 | ** | NA |
| CREBBP | 0.0073732 | 0.6599783 | 0.00737 | ** | NA |
| F11R | 0.0075268 | 0.6631134 | 0.00753 | ** | NA |
| INTS12 | 0.0075473 | 0.6631134 | 0.00755 | ** | NA |
| NAA15 | 0.0075638 | 0.6631134 | 0.00756 | ** | NA |
| ARSB | 0.0076069 | 0.6631134 | 0.00761 | ** | NA |
| VPS37B | 0.0076704 | 0.6631134 | 0.00767 | ** | NA |
| CCDC159 | 0.0077369 | 0.6631134 | 0.00774 | ** | NA |
| SSH1 | 0.0077665 | 0.6631134 | 0.00777 | ** | NA |
| NCR3LG1 | 0.0078580 | 0.6631134 | 0.00786 | ** | NA |
| PLOD1 | 0.0078951 | 0.6631134 | 0.00790 | ** | NA |
| ACYP2 | 0.0079841 | 0.6631134 | 0.00798 | ** | NA |
| XPO5 | 0.0080510 | 0.6631134 | 0.00805 | ** | NA |
| AP5B1 | 0.0080810 | 0.6631134 | 0.00808 | ** | NA |
| OR4S2 | 0.0081035 | 0.6631134 | 0.00810 | ** | NA |
| BTAF1 | 0.0081559 | 0.6631134 | 0.00816 | ** | NA |
| TACSTD2 | 0.0081791 | 0.6631134 | 0.00818 | ** | NA |
| MED30 | 0.0082419 | 0.6631134 | 0.00824 | ** | NA |
| ADAM32 | 0.0082522 | 0.6631134 | 0.00825 | ** | NA |
| LY9 | 0.0083045 | 0.6631134 | 0.00830 | ** | NA |
| H2AFY2 | 0.0083475 | 0.6631134 | 0.00835 | ** | NA |
| MRPL13 | 0.0083762 | 0.6631134 | 0.00838 | ** | NA |
| PAQR3 | 0.0084147 | 0.6631134 | 0.00841 | ** | NA |
| YLPM1 | 0.0084185 | 0.6631134 | 0.00842 | ** | NA |
| GHITM | 0.0084474 | 0.6631134 | 0.00845 | ** | NA |
| SGK2 | 0.0084586 | 0.6631134 | 0.00846 | ** | NA |
| ADO | 0.0085378 | 0.6631134 | 0.00854 | ** | NA |
| ZNF17 | 0.0085562 | 0.6631134 | 0.00856 | ** | NA |
| MFAP4 | 0.0085654 | 0.6631134 | 0.00857 | ** | NA |
| CD163 | 0.0085663 | 0.6631134 | 0.00857 | ** | NA |
| COL7A1 | 0.0085733 | 0.6631134 | 0.00857 | ** | NA |
| ARMC7 | 0.0085988 | 0.6631134 | 0.00860 | ** | NA |
| COL8A1 | 0.0086043 | 0.6631134 | 0.00860 | ** | NA |
| KHDC1L | 0.0086562 | 0.6641352 | 0.00866 | ** | NA |
| PLCH2 | 0.0088368 | 0.6679256 | 0.00884 | ** | NA |
| OR13C3 | 0.0088398 | 0.6679256 | 0.00884 | ** | NA |
| C1orf141 | 0.0089498 | 0.6679256 | 0.00895 | ** | NA |
| ARHGEF17 | 0.0089894 | 0.6679256 | 0.00899 | ** | NA |
| COL9A2 | 0.0090078 | 0.6679256 | 0.00901 | ** | NA |
| H3F3A | 0.0090755 | 0.6679256 | 0.00908 | ** | NA |
| ZC3H4 | 0.0091088 | 0.6679256 | 0.00911 | ** | NA |
| SUPT6H | 0.0091374 | 0.6679256 | 0.00914 | ** | NA |
| CCNL1 | 0.0091597 | 0.6679256 | 0.00916 | ** | NA |
| MAML2 | 0.0091672 | 0.6679256 | 0.00917 | ** | NA |
| TMEM258 | 0.0091769 | 0.6679256 | 0.00918 | ** | NA |
| USP32 | 0.0092096 | 0.6679256 | 0.00921 | ** | NA |
| DNAJC8 | 0.0093058 | 0.6679256 | 0.00931 | ** | NA |
| C20orf26 | 0.0093466 | 0.6679256 | 0.00935 | ** | NA |
| KRTAP4-4 | 0.0093494 | 0.6679256 | 0.00935 | ** | NA |
| ACHE | 0.0093636 | 0.6679256 | 0.00936 | ** | NA |
| LCTL | 0.0093837 | 0.6679256 | 0.00938 | ** | NA |
| SLC25A4 | 0.0094344 | 0.6679256 | 0.00943 | ** | NA |
| LY6G6F | 0.0095144 | 0.6679256 | 0.00951 | ** | NA |
| ZNF330 | 0.0095171 | 0.6679256 | 0.00952 | ** | NA |
| NECAB1 | 0.0095689 | 0.6679256 | 0.00957 | ** | NA |
| PPM1A | 0.0095782 | 0.6679256 | 0.00958 | ** | NA |
| GMCL1 | 0.0096195 | 0.6679256 | 0.00962 | ** | NA |
| C16orf58 | 0.0096729 | 0.6679256 | 0.00967 | ** | NA |
| ZBTB44 | 0.0096904 | 0.6679256 | 0.00969 | ** | NA |
| INHBA | 0.0097282 | 0.6679256 | 0.00973 | ** | NA |
| GLG1 | 0.0097693 | 0.6679256 | 0.00977 | ** | NA |
| RNF31 | 0.0098119 | 0.6679256 | 0.00981 | ** | NA |
| COPS3 | 0.0098996 | 0.6679256 | 0.00990 | ** | NA |
| NOL6 | 0.0099717 | 0.6679256 | 0.00997 | ** | NA |
| WDR77 | 0.0099778 | 0.6679256 | 0.00998 | ** | NA |
wilcox_gene_allmut_delmis_plot <- ggplot(data = crispr_signif_allmut_delmis) +
geom_histogram(aes(x = p, fill = "chartreuse4"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
geom_histogram(aes(x = p.adj, fill = "darkslategray3"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
scale_x_continuous(breaks = seq(0, 1, by = 0.05), labels = seq(0, 1, by = 0.05)) +
scale_fill_manual(name = "P-values", values = c("chartreuse4" = "chartreuse4", "darkslategray3" = "darkslategray3"), labels = c("Unadjusted", "BH-adjusted")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = c(0.1, 0.85)) +
labs(x = "BH-adjusted p-values", y = "Frequency")
wilcox_gene_allmut_delmis_plot
crispr_signif_allmut_del_lineage <- compare_means(Score ~ Mutation_Status_Deleterious, group.by = c("Hugo_Symbol", "group_general_lineage_name"), data = crispr_data, method = "wilcox.test", p.adjust.method = "BH")
crispr_signif_allmut_del_lineage <- adj_signif(crispr_signif_allmut_del_lineage)
crispr_signif_allmut_del_lineage <- crispr_signif_allmut_del_lineage[order(crispr_signif_allmut_del_lineage$p),]
saveRDS(crispr_signif_allmut_del_lineage, "./data_munging/rds/crispr_signif_allmut_deleterious_lineage.rds")
# write.table(crispr_signif_allmut_del_lineage, file = "~/Desktop/crispr_signif_allmut_deleterious_lineage.csv", quote = FALSE, sep = ",", row.names = FALSE)
crispr_signif_allmut_del_lineage <- readRDS("./data_munging/rds/crispr_signif_allmut_deleterious_lineage.rds")
knitr::kable(filter(crispr_signif_allmut_del_lineage, p < 0.01)[, c("Hugo_Symbol", "group_general_lineage_name", "p", "p.adj", "p.format", "p.signif", "p.signif.adj")], caption = "Wilcoxon test results comparing deleterious mutant vs other cell lines by lineage, p < 0.01 (BH-adjusted p-values: * p <= 0.05, ** p <= 0.01, *** p <= 0.001, **** p <= 0.0001)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width = "900px", height = "450px")
| Hugo_Symbol | group_general_lineage_name | p | p.adj | p.format | p.signif | p.signif.adj |
|---|---|---|---|---|---|---|
| PTEN | central nervous system cancer | 0.0006658 | 0.8331843 | 0.00067 | *** | NA |
| TP53 | lung cancer | 0.0006718 | 0.8331843 | 0.00067 | *** | NA |
| ZNF141 | central nervous system cancer | 0.0031757 | 0.8331843 | 0.00318 | ** | NA |
| DCAF8 | lung cancer | 0.0033009 | 0.8331843 | 0.00330 | ** | NA |
| PTEN | ovarian cancer | 0.0048168 | 0.8331843 | 0.00482 | ** | NA |
| ARID1B | ovarian cancer | 0.0054869 | 0.8331843 | 0.00549 | ** | NA |
| IFT122 | lung cancer | 0.0055037 | 0.8331843 | 0.00550 | ** | NA |
| SETD2 | kidney cancer | 0.0056647 | 0.8331843 | 0.00566 | ** | NA |
| KRT17 | breast cancer | 0.0063661 | 0.8331843 | 0.00637 | ** | NA |
| DVL2 | uterine cancer | 0.0067676 | 0.8331843 | 0.00677 | ** | NA |
| ARID1A | pancreatic cancer | 0.0076726 | 0.8331843 | 0.00767 | ** | NA |
| MCPH1 | lung cancer | 0.0077467 | 0.8331843 | 0.00775 | ** | NA |
| COX4I1 | lung cancer | 0.0091372 | 0.8331843 | 0.00914 | ** | NA |
| ZNF343 | lung cancer | 0.0098486 | 0.8331843 | 0.00985 | ** | NA |
wilcox_lineage_allmut_plot <- ggplot(data = crispr_signif_allmut_del_lineage) +
geom_histogram(aes(x = p, fill = "chartreuse4"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
geom_histogram(aes(x = p.adj, fill = "darkslategray3"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
scale_x_continuous(breaks = seq(0, 1, by = 0.05), labels = seq(0, 1, by = 0.05)) +
scale_fill_manual(name = "P-values", values = c("chartreuse4" = "chartreuse4", "darkslategray3" = "darkslategray3"), labels = c("Unadjusted", "BH-adjusted")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = c(0.1, 0.85)) +
labs(x = "BH-adjusted p-values", y = "Frequency")
wilcox_lineage_allmut_plot
crispr_signif_allmut_nonsilent_lineage <- compare_means(Score ~ Mutation_Status_Nonsilent, group.by = c("Hugo_Symbol", "group_general_lineage_name"), data = crispr_data, method = "wilcox.test", p.adjust.method = "BH")
crispr_signif_allmut_nonsilent_lineage <- adj_signif(crispr_signif_allmut_nonsilent_lineage)
crispr_signif_allmut_nonsilent_lineage <- crispr_signif_allmut_nonsilent_lineage[order(crispr_signif_allmut_nonsilent_lineage$p),]
saveRDS(crispr_signif_allmut_nonsilent_lineage, "./data_munging/rds/crispr_signif_allmut_nonsilent_lineage.rds")
# write.table(crispr_signif_allmut_nonsilent_lineage, file = "~/Desktop/crispr_signif_allmut_nonsilent_lineage.csv", quote = FALSE, sep = ",", row.names = FALSE)
crispr_signif_allmut_nonsilent_lineage <- readRDS("./data_munging/rds/crispr_signif_allmut_nonsilent_lineage.rds")
knitr::kable(filter(crispr_signif_allmut_nonsilent_lineage, p < 0.01)[, c("Hugo_Symbol", "group_general_lineage_name", "p", "p.adj", "p.format", "p.signif", "p.signif.adj")], caption = "Wilcoxon test results comparing non-silent mutant vs other cell lines by lineage, p < 0.01 (BH-adjusted p-values: * p <= 0.05, ** p <= 0.01, *** p <= 0.001, **** p <= 0.0001)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width = "900px", height = "450px")
| Hugo_Symbol | group_general_lineage_name | p | p.adj | p.format | p.signif | p.signif.adj |
|---|---|---|---|---|---|---|
| KRAS | lung cancer | 0.0000000 | 0.0000734 | 5.9e-10 | **** | **** |
| TP53 | lung cancer | 0.0000047 | 0.2961085 | 4.7e-06 | **** | NA |
| TP53 | central nervous system cancer | 0.0000302 | 0.8628420 | 3.0e-05 | **** | NA |
| TP53 | ovarian cancer | 0.0001395 | 0.8628420 | 0.00014 | *** | NA |
| KRAS | ovarian cancer | 0.0001488 | 0.8628420 | 0.00015 | *** | NA |
| NRAS | leukemia | 0.0005316 | 0.8628420 | 0.00053 | *** | NA |
| TP53 | leukemia | 0.0005388 | 0.8628420 | 0.00054 | *** | NA |
| NRAS | skin cancer | 0.0006117 | 0.8628420 | 0.00061 | *** | NA |
| TP53 | skin cancer | 0.0008277 | 0.8628420 | 0.00083 | *** | NA |
| PIK3CA | ovarian cancer | 0.0008371 | 0.8628420 | 0.00084 | *** | NA |
| DZANK1 | breast cancer | 0.0009819 | 0.8628420 | 0.00098 | *** | NA |
| PIK3CA | breast cancer | 0.0009842 | 0.8628420 | 0.00098 | *** | NA |
| KRAS | colorectal cancer | 0.0010547 | 0.8628420 | 0.00105 | ** | NA |
| UNC45B | lung cancer | 0.0011215 | 0.8628420 | 0.00112 | ** | NA |
| VHL | kidney cancer | 0.0011846 | 0.8628420 | 0.00118 | ** | NA |
| KRT17 | breast cancer | 0.0012972 | 0.8628420 | 0.00130 | ** | NA |
| PIK3R1 | central nervous system cancer | 0.0013246 | 0.8628420 | 0.00132 | ** | NA |
| GOLGA3 | uterine cancer | 0.0013666 | 0.8628420 | 0.00137 | ** | NA |
| NRAS | multiple myeloma | 0.0013756 | 0.8628420 | 0.00138 | ** | NA |
| GTF3C1 | lung cancer | 0.0013893 | 0.8628420 | 0.00139 | ** | NA |
| TICRR | uterine cancer | 0.0014839 | 0.8628420 | 0.00148 | ** | NA |
| ARID1A | pancreatic cancer | 0.0014920 | 0.8628420 | 0.00149 | ** | NA |
| TP53 | kidney cancer | 0.0016347 | 0.8628420 | 0.00163 | ** | NA |
| HYDIN | colorectal cancer | 0.0017600 | 0.8628420 | 0.00176 | ** | NA |
| PTEN | ovarian cancer | 0.0018079 | 0.8628420 | 0.00181 | ** | NA |
| HYOU1 | colorectal cancer | 0.0018107 | 0.8628420 | 0.00181 | ** | NA |
| USP34 | breast cancer | 0.0018820 | 0.8628420 | 0.00188 | ** | NA |
| MGA | uterine cancer | 0.0019495 | 0.8628420 | 0.00195 | ** | NA |
| SVOPL | lung cancer | 0.0019973 | 0.8628420 | 0.00200 | ** | NA |
| LILRB2 | leukemia | 0.0021670 | 0.8628420 | 0.00217 | ** | NA |
| PIK3R1 | ovarian cancer | 0.0021769 | 0.8628420 | 0.00218 | ** | NA |
| KMT2B | stomach cancer | 0.0022800 | 0.8628420 | 0.00228 | ** | NA |
| TNRC6A | lung cancer | 0.0023744 | 0.8628420 | 0.00237 | ** | NA |
| COX4I1 | lung cancer | 0.0023949 | 0.8628420 | 0.00239 | ** | NA |
| SNX29 | uterine cancer | 0.0026104 | 0.8628420 | 0.00261 | ** | NA |
| KIF3B | ovarian cancer | 0.0026230 | 0.8628420 | 0.00262 | ** | NA |
| KMT2B | breast cancer | 0.0026433 | 0.8628420 | 0.00264 | ** | NA |
| GSPT1 | colorectal cancer | 0.0028115 | 0.8628420 | 0.00281 | ** | NA |
| BRAF | ovarian cancer | 0.0028709 | 0.8628420 | 0.00287 | ** | NA |
| AP1G2 | breast cancer | 0.0029835 | 0.8628420 | 0.00298 | ** | NA |
| ZNF264 | colorectal cancer | 0.0029879 | 0.8628420 | 0.00299 | ** | NA |
| ZNF141 | central nervous system cancer | 0.0031757 | 0.8628420 | 0.00318 | ** | NA |
| KMT2B | ovarian cancer | 0.0032684 | 0.8628420 | 0.00327 | ** | NA |
| KIAA0586 | colorectal cancer | 0.0032838 | 0.8628420 | 0.00328 | ** | NA |
| ANK2 | colorectal cancer | 0.0033597 | 0.8628420 | 0.00336 | ** | NA |
| RPS19BP1 | multiple myeloma | 0.0034613 | 0.8628420 | 0.00346 | ** | NA |
| TGS1 | uterine cancer | 0.0035864 | 0.8628420 | 0.00359 | ** | NA |
| COL2A1 | leukemia | 0.0036054 | 0.8628420 | 0.00361 | ** | NA |
| SLIT3 | leukemia | 0.0036054 | 0.8628420 | 0.00361 | ** | NA |
| TP53 | colorectal cancer | 0.0036994 | 0.8628420 | 0.00370 | ** | NA |
| HTR7 | lung cancer | 0.0037248 | 0.8628420 | 0.00372 | ** | NA |
| PAPPA | uterine cancer | 0.0037971 | 0.8628420 | 0.00380 | ** | NA |
| ZNF292 | lung cancer | 0.0038193 | 0.8628420 | 0.00382 | ** | NA |
| PTEN | central nervous system cancer | 0.0038648 | 0.8628420 | 0.00386 | ** | NA |
| KRAS | stomach cancer | 0.0040001 | 0.8628420 | 0.00400 | ** | NA |
| MTMR3 | lung cancer | 0.0041192 | 0.8628420 | 0.00412 | ** | NA |
| TSKS | uterine cancer | 0.0042550 | 0.8628420 | 0.00425 | ** | NA |
| ARHGAP12 | colorectal cancer | 0.0042860 | 0.8628420 | 0.00429 | ** | NA |
| PAM | uterine cancer | 0.0043488 | 0.8628420 | 0.00435 | ** | NA |
| PDE10A | uterine cancer | 0.0043488 | 0.8628420 | 0.00435 | ** | NA |
| BRAF | skin cancer | 0.0043710 | 0.8628420 | 0.00437 | ** | NA |
| ECT2L | central nervous system cancer | 0.0044085 | 0.8628420 | 0.00441 | ** | NA |
| MAML2 | uterine cancer | 0.0044510 | 0.8628420 | 0.00445 | ** | NA |
| ARID1B | ovarian cancer | 0.0046380 | 0.8628420 | 0.00464 | ** | NA |
| NCOR2 | colorectal cancer | 0.0046575 | 0.8628420 | 0.00466 | ** | NA |
| NLRP9 | lung cancer | 0.0046872 | 0.8628420 | 0.00469 | ** | NA |
| LRP4 | uterine cancer | 0.0046963 | 0.8628420 | 0.00470 | ** | NA |
| VAV3 | lung cancer | 0.0047769 | 0.8628420 | 0.00478 | ** | NA |
| NUP88 | uterine cancer | 0.0048888 | 0.8628420 | 0.00489 | ** | NA |
| LRRIQ1 | central nervous system cancer | 0.0049271 | 0.8628420 | 0.00493 | ** | NA |
| PTEN | uterine cancer | 0.0050051 | 0.8628420 | 0.00501 | ** | NA |
| AXIN1 | ovarian cancer | 0.0050354 | 0.8628420 | 0.00504 | ** | NA |
| PIK3CA | colorectal cancer | 0.0050602 | 0.8628420 | 0.00506 | ** | NA |
| TNK2 | uterine cancer | 0.0050864 | 0.8628420 | 0.00509 | ** | NA |
| DIP2C | kidney cancer | 0.0052747 | 0.8628420 | 0.00527 | ** | NA |
| CTAGE15 | lung cancer | 0.0053375 | 0.8628420 | 0.00534 | ** | NA |
| LSP1 | colorectal cancer | 0.0053452 | 0.8628420 | 0.00535 | ** | NA |
| CORIN | colorectal cancer | 0.0054756 | 0.8628420 | 0.00548 | ** | NA |
| SETD2 | kidney cancer | 0.0056647 | 0.8628420 | 0.00566 | ** | NA |
| FNDC7 | colorectal cancer | 0.0057059 | 0.8628420 | 0.00571 | ** | NA |
| SKOR1 | uterine cancer | 0.0057795 | 0.8628420 | 0.00578 | ** | NA |
| PARD3B | uterine cancer | 0.0059302 | 0.8628420 | 0.00593 | ** | NA |
| FNIP1 | ovarian cancer | 0.0060557 | 0.8628420 | 0.00606 | ** | NA |
| AP2A2 | breast cancer | 0.0061570 | 0.8628420 | 0.00616 | ** | NA |
| ASXL1 | colorectal cancer | 0.0061648 | 0.8628420 | 0.00616 | ** | NA |
| CCT8L2 | colorectal cancer | 0.0061648 | 0.8628420 | 0.00616 | ** | NA |
| MTMR14 | colorectal cancer | 0.0061648 | 0.8628420 | 0.00616 | ** | NA |
| TTC32 | uterine cancer | 0.0062881 | 0.8628420 | 0.00629 | ** | NA |
| NR4A1 | lung cancer | 0.0064934 | 0.8628420 | 0.00649 | ** | NA |
| SLC22A17 | lung cancer | 0.0064934 | 0.8628420 | 0.00649 | ** | NA |
| FAM208B | colorectal cancer | 0.0065065 | 0.8628420 | 0.00651 | ** | NA |
| ROCK1 | colorectal cancer | 0.0066106 | 0.8628420 | 0.00661 | ** | NA |
| ZNF521 | colorectal cancer | 0.0066975 | 0.8628420 | 0.00670 | ** | NA |
| RFWD3 | lung cancer | 0.0067488 | 0.8628420 | 0.00675 | ** | NA |
| TYRP1 | lung cancer | 0.0067488 | 0.8628420 | 0.00675 | ** | NA |
| ADRBK2 | uterine cancer | 0.0067676 | 0.8628420 | 0.00677 | ** | NA |
| DVL2 | uterine cancer | 0.0067676 | 0.8628420 | 0.00677 | ** | NA |
| EXOC8 | uterine cancer | 0.0067676 | 0.8628420 | 0.00677 | ** | NA |
| VPS51 | pancreatic cancer | 0.0067999 | 0.8628420 | 0.00680 | ** | NA |
| ITSN1 | colorectal cancer | 0.0069403 | 0.8628420 | 0.00694 | ** | NA |
| ZNF292 | colorectal cancer | 0.0069403 | 0.8628420 | 0.00694 | ** | NA |
| OR5M8 | colorectal cancer | 0.0070184 | 0.8628420 | 0.00702 | ** | NA |
| C10orf76 | ovarian cancer | 0.0070773 | 0.8628420 | 0.00708 | ** | NA |
| DDX11 | lung cancer | 0.0070952 | 0.8628420 | 0.00710 | ** | NA |
| KMT2C | multiple myeloma | 0.0071541 | 0.8628420 | 0.00715 | ** | NA |
| MYH6 | uterine cancer | 0.0071786 | 0.8628420 | 0.00718 | ** | NA |
| SURF6 | uterine cancer | 0.0073058 | 0.8628420 | 0.00731 | ** | NA |
| GLP2R | lung cancer | 0.0073425 | 0.8628420 | 0.00734 | ** | NA |
| C1orf86 | leukemia | 0.0073710 | 0.8628420 | 0.00737 | ** | NA |
| TEX10 | ovarian cancer | 0.0075601 | 0.8628420 | 0.00756 | ** | NA |
| ALPK3 | ovarian cancer | 0.0076300 | 0.8628420 | 0.00763 | ** | NA |
| MYOM3 | lung cancer | 0.0076435 | 0.8628420 | 0.00764 | ** | NA |
| ALPPL2 | ovarian cancer | 0.0077480 | 0.8628420 | 0.00775 | ** | NA |
| PNPLA5 | colorectal cancer | 0.0077740 | 0.8628420 | 0.00777 | ** | NA |
| PTPRN2 | colorectal cancer | 0.0078299 | 0.8628420 | 0.00783 | ** | NA |
| PTGFRN | lung cancer | 0.0078691 | 0.8628420 | 0.00787 | ** | NA |
| PCDH10 | uterine cancer | 0.0078925 | 0.8628420 | 0.00789 | ** | NA |
| ANKRD23 | colorectal cancer | 0.0078965 | 0.8628420 | 0.00790 | ** | NA |
| RB1 | central nervous system cancer | 0.0079424 | 0.8628420 | 0.00794 | ** | NA |
| HERC1 | skin cancer | 0.0079501 | 0.8628420 | 0.00795 | ** | NA |
| RANBP2 | colorectal cancer | 0.0079744 | 0.8628420 | 0.00797 | ** | NA |
| SCFD1 | colorectal cancer | 0.0079828 | 0.8628420 | 0.00798 | ** | NA |
| C12orf4 | uterine cancer | 0.0080215 | 0.8628420 | 0.00802 | ** | NA |
| EGFLAM | lung cancer | 0.0081906 | 0.8628420 | 0.00819 | ** | NA |
| FAM160B1 | ovarian cancer | 0.0082030 | 0.8628420 | 0.00820 | ** | NA |
| SPRED1 | lung cancer | 0.0084512 | 0.8628420 | 0.00845 | ** | NA |
| MROH1 | pancreatic cancer | 0.0084519 | 0.8628420 | 0.00845 | ** | NA |
| SBF2 | lung cancer | 0.0084541 | 0.8628420 | 0.00845 | ** | NA |
| EFCAB5 | ovarian cancer | 0.0084624 | 0.8628420 | 0.00846 | ** | NA |
| THSD7A | lung cancer | 0.0085271 | 0.8628420 | 0.00853 | ** | NA |
| C6orf211 | uterine cancer | 0.0085754 | 0.8628420 | 0.00858 | ** | NA |
| CSNK1D | uterine cancer | 0.0085754 | 0.8628420 | 0.00858 | ** | NA |
| FAM110A | uterine cancer | 0.0085754 | 0.8628420 | 0.00858 | ** | NA |
| MICAL1 | uterine cancer | 0.0085754 | 0.8628420 | 0.00858 | ** | NA |
| MORF4L1 | uterine cancer | 0.0085754 | 0.8628420 | 0.00858 | ** | NA |
| RIOK2 | uterine cancer | 0.0085754 | 0.8628420 | 0.00858 | ** | NA |
| GAA | colorectal cancer | 0.0085960 | 0.8628420 | 0.00860 | ** | NA |
| KIAA1107 | colorectal cancer | 0.0085960 | 0.8628420 | 0.00860 | ** | NA |
| ZNF536 | skin cancer | 0.0086987 | 0.8628420 | 0.00870 | ** | NA |
| RPS6KA2 | uterine cancer | 0.0087779 | 0.8628420 | 0.00878 | ** | NA |
| NF1 | peripheral nervous system neoplasm | 0.0088143 | 0.8628420 | 0.00881 | ** | NA |
| TPR | lung cancer | 0.0088831 | 0.8628420 | 0.00888 | ** | NA |
| NFE2L2 | esophageal cancer | 0.0088890 | 0.8628420 | 0.00889 | ** | NA |
| TECRL | lung cancer | 0.0089331 | 0.8628420 | 0.00893 | ** | NA |
| CLEC9A | liver cancer | 0.0090085 | 0.8628420 | 0.00901 | ** | NA |
| SVEP1 | skin cancer | 0.0090238 | 0.8628420 | 0.00902 | ** | NA |
| DPYSL5 | uterine cancer | 0.0091517 | 0.8628420 | 0.00915 | ** | NA |
| FAM189A1 | uterine cancer | 0.0091517 | 0.8628420 | 0.00915 | ** | NA |
| HSPB2 | uterine cancer | 0.0091517 | 0.8628420 | 0.00915 | ** | NA |
| IRX6 | uterine cancer | 0.0091517 | 0.8628420 | 0.00915 | ** | NA |
| XKR9 | uterine cancer | 0.0091517 | 0.8628420 | 0.00915 | ** | NA |
| KIAA0922 | lung cancer | 0.0091532 | 0.8628420 | 0.00915 | ** | NA |
| STK17B | lung cancer | 0.0092125 | 0.8628420 | 0.00921 | ** | NA |
| TTF1 | lung cancer | 0.0092125 | 0.8628420 | 0.00921 | ** | NA |
| SYNE1 | lung cancer | 0.0092324 | 0.8628420 | 0.00923 | ** | NA |
| TBC1D10B | uterine cancer | 0.0092568 | 0.8628420 | 0.00926 | ** | NA |
| DNAJC8 | uterine cancer | 0.0092717 | 0.8628420 | 0.00927 | ** | NA |
| PKDCC | uterine cancer | 0.0092717 | 0.8628420 | 0.00927 | ** | NA |
| TCF3 | uterine cancer | 0.0092717 | 0.8628420 | 0.00927 | ** | NA |
| ZNF207 | uterine cancer | 0.0093098 | 0.8628420 | 0.00931 | ** | NA |
| RASIP1 | colorectal cancer | 0.0093199 | 0.8628420 | 0.00932 | ** | NA |
| AP5B1 | uterine cancer | 0.0093215 | 0.8628420 | 0.00932 | ** | NA |
| RAD50 | leukemia | 0.0094090 | 0.8628420 | 0.00941 | ** | NA |
| DNAH5 | colorectal cancer | 0.0095928 | 0.8628420 | 0.00959 | ** | NA |
| TJP3 | head and neck cancer | 0.0096224 | 0.8628420 | 0.00962 | ** | NA |
| NRP2 | colorectal cancer | 0.0096226 | 0.8628420 | 0.00962 | ** | NA |
| INTS4 | uterine cancer | 0.0096407 | 0.8628420 | 0.00964 | ** | NA |
| TCHH | liver cancer | 0.0096613 | 0.8628420 | 0.00966 | ** | NA |
| TTF1 | leukemia | 0.0097690 | 0.8628420 | 0.00977 | ** | NA |
| ZNF343 | lung cancer | 0.0098486 | 0.8628420 | 0.00985 | ** | NA |
| ZNF469 | uterine cancer | 0.0099581 | 0.8628420 | 0.00996 | ** | NA |
| IPO4 | uterine cancer | 0.0099688 | 0.8628420 | 0.00997 | ** | NA |
wilcox_lineage_allmut_nonsilent_plot <- ggplot(data = crispr_signif_allmut_nonsilent_lineage) +
geom_histogram(aes(x = p, fill = "chartreuse4"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
geom_histogram(aes(x = p.adj, fill = "darkslategray3"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
scale_x_continuous(breaks = seq(0, 1, by = 0.05), labels = seq(0, 1, by = 0.05)) +
scale_fill_manual(name = "P-values", values = c("chartreuse4" = "chartreuse4", "darkslategray3" = "darkslategray3"), labels = c("Unadjusted", "BH-adjusted")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = c(0.1, 0.85)) +
labs(x = "BH-adjusted p-values", y = "Frequency")
wilcox_lineage_allmut_nonsilent_plot
crispr_signif_allmut_delmis_lineage <- compare_means(Score ~ Mutation_Status_DeleteriousMissense, group.by = c("Hugo_Symbol", "group_general_lineage_name"), data = crispr_data, method = "wilcox.test", p.adjust.method = "BH")
crispr_signif_allmut_delmis_lineage <- adj_signif(crispr_signif_allmut_delmis_lineage)
crispr_signif_allmut_delmis_lineage <- crispr_signif_allmut_delmis_lineage[order(crispr_signif_allmut_delmis_lineage$p),]
saveRDS(crispr_signif_allmut_delmis_lineage, "./data_munging/rds/crispr_signif_allmut_deleteriousmissens_lineage.rds")
# write.table(crispr_signif_allmut_delmis_lineage, file = "~/Desktop/crispr_signif_allmut_deleteriousmissense_lineage.csv", quote = FALSE, sep = ",", row.names = FALSE)
crispr_signif_allmut_delmis_lineage <- readRDS("./data_munging/rds/crispr_signif_allmut_deleteriousmissens_lineage.rds")
knitr::kable(filter(crispr_signif_allmut_delmis_lineage, p < 0.01)[, c("Hugo_Symbol", "group_general_lineage_name", "p", "p.adj", "p.format", "p.signif", "p.signif.adj")], caption = "Wilcoxon test results comparing deleterious vs missense mutant and other cell lines, p < 0.01 (BH-adjusted p-values: * p <= 0.05, ** p <= 0.01, *** p <= 0.001, **** p <= 0.0001)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width = "900px", height = "450px")
| Hugo_Symbol | group_general_lineage_name | p | p.adj | p.format | p.signif | p.signif.adj |
|---|---|---|---|---|---|---|
| KRAS | lung cancer | 0.0000000 | 0.0000729 | 5.9e-10 | **** | **** |
| TP53 | lung cancer | 0.0000104 | 0.6484491 | 1.0e-05 | **** | NA |
| TP53 | ovarian cancer | 0.0001395 | 0.8627299 | 0.00014 | *** | NA |
| KRAS | ovarian cancer | 0.0001488 | 0.8627299 | 0.00015 | *** | NA |
| TP53 | central nervous system cancer | 0.0004398 | 0.8627299 | 0.00044 | *** | NA |
| NRAS | leukemia | 0.0005316 | 0.8627299 | 0.00053 | *** | NA |
| TP53 | leukemia | 0.0005388 | 0.8627299 | 0.00054 | *** | NA |
| NRAS | skin cancer | 0.0006117 | 0.8627299 | 0.00061 | *** | NA |
| TP53 | skin cancer | 0.0008277 | 0.8627299 | 0.00083 | *** | NA |
| PIK3CA | ovarian cancer | 0.0008371 | 0.8627299 | 0.00084 | *** | NA |
| DZANK1 | breast cancer | 0.0009819 | 0.8627299 | 0.00098 | *** | NA |
| PIK3CA | breast cancer | 0.0009842 | 0.8627299 | 0.00098 | *** | NA |
| KRAS | colorectal cancer | 0.0010547 | 0.8627299 | 0.00105 | ** | NA |
| UNC45B | lung cancer | 0.0011215 | 0.8627299 | 0.00112 | ** | NA |
| KRT17 | breast cancer | 0.0012972 | 0.8627299 | 0.00130 | ** | NA |
| GOLGA3 | uterine cancer | 0.0013666 | 0.8627299 | 0.00137 | ** | NA |
| NRAS | multiple myeloma | 0.0013756 | 0.8627299 | 0.00138 | ** | NA |
| GTF3C1 | lung cancer | 0.0013893 | 0.8627299 | 0.00139 | ** | NA |
| TICRR | uterine cancer | 0.0014839 | 0.8627299 | 0.00148 | ** | NA |
| ARID1A | pancreatic cancer | 0.0014920 | 0.8627299 | 0.00149 | ** | NA |
| HYOU1 | colorectal cancer | 0.0018107 | 0.8627299 | 0.00181 | ** | NA |
| USP34 | breast cancer | 0.0018820 | 0.8627299 | 0.00188 | ** | NA |
| MGA | uterine cancer | 0.0019495 | 0.8627299 | 0.00195 | ** | NA |
| SVOPL | lung cancer | 0.0019973 | 0.8627299 | 0.00200 | ** | NA |
| KMT2B | stomach cancer | 0.0022800 | 0.8627299 | 0.00228 | ** | NA |
| ARID1B | ovarian cancer | 0.0022924 | 0.8627299 | 0.00229 | ** | NA |
| TNRC6A | lung cancer | 0.0023744 | 0.8627299 | 0.00237 | ** | NA |
| SNX29 | uterine cancer | 0.0026104 | 0.8627299 | 0.00261 | ** | NA |
| KIF3B | ovarian cancer | 0.0026230 | 0.8627299 | 0.00262 | ** | NA |
| KMT2B | breast cancer | 0.0026433 | 0.8627299 | 0.00264 | ** | NA |
| GSPT1 | colorectal cancer | 0.0028115 | 0.8627299 | 0.00281 | ** | NA |
| AP1G2 | breast cancer | 0.0029835 | 0.8627299 | 0.00298 | ** | NA |
| ZNF264 | colorectal cancer | 0.0029879 | 0.8627299 | 0.00299 | ** | NA |
| ZNF141 | central nervous system cancer | 0.0031757 | 0.8627299 | 0.00318 | ** | NA |
| KMT2B | ovarian cancer | 0.0032684 | 0.8627299 | 0.00327 | ** | NA |
| KIAA0586 | colorectal cancer | 0.0032838 | 0.8627299 | 0.00328 | ** | NA |
| ANK2 | colorectal cancer | 0.0033597 | 0.8627299 | 0.00336 | ** | NA |
| RPS19BP1 | multiple myeloma | 0.0034613 | 0.8627299 | 0.00346 | ** | NA |
| TGS1 | uterine cancer | 0.0035864 | 0.8627299 | 0.00359 | ** | NA |
| COL2A1 | leukemia | 0.0036054 | 0.8627299 | 0.00361 | ** | NA |
| SLIT3 | leukemia | 0.0036054 | 0.8627299 | 0.00361 | ** | NA |
| TP53 | colorectal cancer | 0.0036994 | 0.8627299 | 0.00370 | ** | NA |
| HTR7 | lung cancer | 0.0037248 | 0.8627299 | 0.00372 | ** | NA |
| PAPPA | uterine cancer | 0.0037971 | 0.8627299 | 0.00380 | ** | NA |
| ZNF292 | lung cancer | 0.0038193 | 0.8627299 | 0.00382 | ** | NA |
| PTEN | central nervous system cancer | 0.0038648 | 0.8627299 | 0.00386 | ** | NA |
| PTEN | ovarian cancer | 0.0039517 | 0.8627299 | 0.00395 | ** | NA |
| KRAS | stomach cancer | 0.0040001 | 0.8627299 | 0.00400 | ** | NA |
| MTMR3 | lung cancer | 0.0041192 | 0.8627299 | 0.00412 | ** | NA |
| TSKS | uterine cancer | 0.0042550 | 0.8627299 | 0.00425 | ** | NA |
| PTEN | uterine cancer | 0.0042857 | 0.8627299 | 0.00429 | ** | NA |
| ARHGAP12 | colorectal cancer | 0.0042860 | 0.8627299 | 0.00429 | ** | NA |
| PAM | uterine cancer | 0.0043488 | 0.8627299 | 0.00435 | ** | NA |
| PDE10A | uterine cancer | 0.0043488 | 0.8627299 | 0.00435 | ** | NA |
| BRAF | skin cancer | 0.0043710 | 0.8627299 | 0.00437 | ** | NA |
| ECT2L | central nervous system cancer | 0.0044085 | 0.8627299 | 0.00441 | ** | NA |
| VHL | kidney cancer | 0.0046189 | 0.8627299 | 0.00462 | ** | NA |
| NCOR2 | colorectal cancer | 0.0046575 | 0.8627299 | 0.00466 | ** | NA |
| NLRP9 | lung cancer | 0.0046872 | 0.8627299 | 0.00469 | ** | NA |
| LRP4 | uterine cancer | 0.0046963 | 0.8627299 | 0.00470 | ** | NA |
| VAV3 | lung cancer | 0.0047769 | 0.8627299 | 0.00478 | ** | NA |
| NUP88 | uterine cancer | 0.0048888 | 0.8627299 | 0.00489 | ** | NA |
| LRRIQ1 | central nervous system cancer | 0.0049271 | 0.8627299 | 0.00493 | ** | NA |
| AXIN1 | ovarian cancer | 0.0050354 | 0.8627299 | 0.00504 | ** | NA |
| TNK2 | uterine cancer | 0.0050864 | 0.8627299 | 0.00509 | ** | NA |
| DIP2C | kidney cancer | 0.0052747 | 0.8627299 | 0.00527 | ** | NA |
| CTAGE15 | lung cancer | 0.0053375 | 0.8627299 | 0.00534 | ** | NA |
| LSP1 | colorectal cancer | 0.0053452 | 0.8627299 | 0.00535 | ** | NA |
| CORIN | colorectal cancer | 0.0054756 | 0.8627299 | 0.00548 | ** | NA |
| SETD2 | kidney cancer | 0.0056647 | 0.8627299 | 0.00566 | ** | NA |
| FNDC7 | colorectal cancer | 0.0057059 | 0.8627299 | 0.00571 | ** | NA |
| SKOR1 | uterine cancer | 0.0057795 | 0.8627299 | 0.00578 | ** | NA |
| HYDIN | colorectal cancer | 0.0057929 | 0.8627299 | 0.00579 | ** | NA |
| PARD3B | uterine cancer | 0.0059302 | 0.8627299 | 0.00593 | ** | NA |
| FNIP1 | ovarian cancer | 0.0060557 | 0.8627299 | 0.00606 | ** | NA |
| AP2A2 | breast cancer | 0.0061570 | 0.8627299 | 0.00616 | ** | NA |
| ASXL1 | colorectal cancer | 0.0061648 | 0.8627299 | 0.00616 | ** | NA |
| CCT8L2 | colorectal cancer | 0.0061648 | 0.8627299 | 0.00616 | ** | NA |
| MTMR14 | colorectal cancer | 0.0061648 | 0.8627299 | 0.00616 | ** | NA |
| TTC32 | uterine cancer | 0.0062881 | 0.8627299 | 0.00629 | ** | NA |
| NR4A1 | lung cancer | 0.0064934 | 0.8627299 | 0.00649 | ** | NA |
| SLC22A17 | lung cancer | 0.0064934 | 0.8627299 | 0.00649 | ** | NA |
| FAM208B | colorectal cancer | 0.0065065 | 0.8627299 | 0.00651 | ** | NA |
| ROCK1 | colorectal cancer | 0.0066106 | 0.8627299 | 0.00661 | ** | NA |
| ZNF521 | colorectal cancer | 0.0066975 | 0.8627299 | 0.00670 | ** | NA |
| RFWD3 | lung cancer | 0.0067488 | 0.8627299 | 0.00675 | ** | NA |
| TYRP1 | lung cancer | 0.0067488 | 0.8627299 | 0.00675 | ** | NA |
| ADRBK2 | uterine cancer | 0.0067676 | 0.8627299 | 0.00677 | ** | NA |
| DVL2 | uterine cancer | 0.0067676 | 0.8627299 | 0.00677 | ** | NA |
| EXOC8 | uterine cancer | 0.0067676 | 0.8627299 | 0.00677 | ** | NA |
| VPS51 | pancreatic cancer | 0.0067999 | 0.8627299 | 0.00680 | ** | NA |
| ITSN1 | colorectal cancer | 0.0069403 | 0.8627299 | 0.00694 | ** | NA |
| ZNF292 | colorectal cancer | 0.0069403 | 0.8627299 | 0.00694 | ** | NA |
| OR5M8 | colorectal cancer | 0.0070184 | 0.8627299 | 0.00702 | ** | NA |
| C10orf76 | ovarian cancer | 0.0070773 | 0.8627299 | 0.00708 | ** | NA |
| DDX11 | lung cancer | 0.0070952 | 0.8627299 | 0.00710 | ** | NA |
| KMT2C | multiple myeloma | 0.0071541 | 0.8627299 | 0.00715 | ** | NA |
| MYH6 | uterine cancer | 0.0071786 | 0.8627299 | 0.00718 | ** | NA |
| SURF6 | uterine cancer | 0.0073058 | 0.8627299 | 0.00731 | ** | NA |
| PIK3CA | colorectal cancer | 0.0073288 | 0.8627299 | 0.00733 | ** | NA |
| GLP2R | lung cancer | 0.0073425 | 0.8627299 | 0.00734 | ** | NA |
| C1orf86 | leukemia | 0.0073710 | 0.8627299 | 0.00737 | ** | NA |
| TEX10 | ovarian cancer | 0.0075601 | 0.8627299 | 0.00756 | ** | NA |
| ALPK3 | ovarian cancer | 0.0076300 | 0.8627299 | 0.00763 | ** | NA |
| MYOM3 | lung cancer | 0.0076435 | 0.8627299 | 0.00764 | ** | NA |
| ALPPL2 | ovarian cancer | 0.0077480 | 0.8627299 | 0.00775 | ** | NA |
| TP53 | kidney cancer | 0.0077589 | 0.8627299 | 0.00776 | ** | NA |
| PNPLA5 | colorectal cancer | 0.0077740 | 0.8627299 | 0.00777 | ** | NA |
| PTPRN2 | colorectal cancer | 0.0078299 | 0.8627299 | 0.00783 | ** | NA |
| PTGFRN | lung cancer | 0.0078691 | 0.8627299 | 0.00787 | ** | NA |
| PCDH10 | uterine cancer | 0.0078925 | 0.8627299 | 0.00789 | ** | NA |
| ANKRD23 | colorectal cancer | 0.0078965 | 0.8627299 | 0.00790 | ** | NA |
| RB1 | central nervous system cancer | 0.0079424 | 0.8627299 | 0.00794 | ** | NA |
| HERC1 | skin cancer | 0.0079501 | 0.8627299 | 0.00795 | ** | NA |
| SCFD1 | colorectal cancer | 0.0079828 | 0.8627299 | 0.00798 | ** | NA |
| C12orf4 | uterine cancer | 0.0080215 | 0.8627299 | 0.00802 | ** | NA |
| EGFLAM | lung cancer | 0.0081906 | 0.8627299 | 0.00819 | ** | NA |
| SPRED1 | lung cancer | 0.0084512 | 0.8627299 | 0.00845 | ** | NA |
| MROH1 | pancreatic cancer | 0.0084519 | 0.8627299 | 0.00845 | ** | NA |
| SBF2 | lung cancer | 0.0084541 | 0.8627299 | 0.00845 | ** | NA |
| EFCAB5 | ovarian cancer | 0.0084624 | 0.8627299 | 0.00846 | ** | NA |
| C6orf211 | uterine cancer | 0.0085754 | 0.8627299 | 0.00858 | ** | NA |
| CSNK1D | uterine cancer | 0.0085754 | 0.8627299 | 0.00858 | ** | NA |
| FAM110A | uterine cancer | 0.0085754 | 0.8627299 | 0.00858 | ** | NA |
| MICAL1 | uterine cancer | 0.0085754 | 0.8627299 | 0.00858 | ** | NA |
| MORF4L1 | uterine cancer | 0.0085754 | 0.8627299 | 0.00858 | ** | NA |
| RIOK2 | uterine cancer | 0.0085754 | 0.8627299 | 0.00858 | ** | NA |
| GAA | colorectal cancer | 0.0085960 | 0.8627299 | 0.00860 | ** | NA |
| KIAA1107 | colorectal cancer | 0.0085960 | 0.8627299 | 0.00860 | ** | NA |
| ZNF536 | skin cancer | 0.0086987 | 0.8627299 | 0.00870 | ** | NA |
| RPS6KA2 | uterine cancer | 0.0087779 | 0.8627299 | 0.00878 | ** | NA |
| TPR | lung cancer | 0.0088831 | 0.8627299 | 0.00888 | ** | NA |
| SHANK1 | central nervous system cancer | 0.0088878 | 0.8627299 | 0.00889 | ** | NA |
| TECRL | lung cancer | 0.0089331 | 0.8627299 | 0.00893 | ** | NA |
| CLEC9A | liver cancer | 0.0090085 | 0.8627299 | 0.00901 | ** | NA |
| SVEP1 | skin cancer | 0.0090238 | 0.8627299 | 0.00902 | ** | NA |
| COX4I1 | lung cancer | 0.0091372 | 0.8627299 | 0.00914 | ** | NA |
| DPYSL5 | uterine cancer | 0.0091517 | 0.8627299 | 0.00915 | ** | NA |
| FAM189A1 | uterine cancer | 0.0091517 | 0.8627299 | 0.00915 | ** | NA |
| IRX6 | uterine cancer | 0.0091517 | 0.8627299 | 0.00915 | ** | NA |
| XKR9 | uterine cancer | 0.0091517 | 0.8627299 | 0.00915 | ** | NA |
| KIAA0922 | lung cancer | 0.0091532 | 0.8627299 | 0.00915 | ** | NA |
| TTF1 | lung cancer | 0.0092125 | 0.8627299 | 0.00921 | ** | NA |
| SYNE1 | lung cancer | 0.0092324 | 0.8627299 | 0.00923 | ** | NA |
| TBC1D10B | uterine cancer | 0.0092568 | 0.8627299 | 0.00926 | ** | NA |
| DNAJC8 | uterine cancer | 0.0092717 | 0.8627299 | 0.00927 | ** | NA |
| PKDCC | uterine cancer | 0.0092717 | 0.8627299 | 0.00927 | ** | NA |
| TCF3 | uterine cancer | 0.0092717 | 0.8627299 | 0.00927 | ** | NA |
| ZNF207 | uterine cancer | 0.0093098 | 0.8627299 | 0.00931 | ** | NA |
| RASIP1 | colorectal cancer | 0.0093199 | 0.8627299 | 0.00932 | ** | NA |
| AP5B1 | uterine cancer | 0.0093215 | 0.8627299 | 0.00932 | ** | NA |
| RAD50 | leukemia | 0.0094090 | 0.8627299 | 0.00941 | ** | NA |
| DNAH5 | colorectal cancer | 0.0095928 | 0.8627299 | 0.00959 | ** | NA |
| TJP3 | head and neck cancer | 0.0096224 | 0.8627299 | 0.00962 | ** | NA |
| NRP2 | colorectal cancer | 0.0096226 | 0.8627299 | 0.00962 | ** | NA |
| INTS4 | uterine cancer | 0.0096407 | 0.8627299 | 0.00964 | ** | NA |
| TCHH | liver cancer | 0.0096613 | 0.8627299 | 0.00966 | ** | NA |
| TTF1 | leukemia | 0.0097690 | 0.8627299 | 0.00977 | ** | NA |
| ZNF343 | lung cancer | 0.0098486 | 0.8627299 | 0.00985 | ** | NA |
| LILRB2 | leukemia | 0.0098632 | 0.8627299 | 0.00986 | ** | NA |
| ZNF469 | uterine cancer | 0.0099581 | 0.8627299 | 0.00996 | ** | NA |
| IPO4 | uterine cancer | 0.0099688 | 0.8627299 | 0.00997 | ** | NA |
wilcox_lineage_allmut_delmis_plot <- ggplot(data = crispr_signif_allmut_delmis_lineage) +
geom_histogram(aes(x = p, fill = "chartreuse4"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
geom_histogram(aes(x = p.adj, fill = "darkslategray3"), breaks = seq(0, 1, by = 0.025), color = "black", alpha = 0.7) +
scale_x_continuous(breaks = seq(0, 1, by = 0.05), labels = seq(0, 1, by = 0.05)) +
scale_fill_manual(name = "P-values", values = c("chartreuse4" = "chartreuse4", "darkslategray3" = "darkslategray3"), labels = c("Unadjusted", "BH-adjusted")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = c(0.1, 0.85)) +
labs(x = "BH-adjusted p-values", y = "Frequency")
wilcox_lineage_allmut_delmis_plot
Mutation status boxplots for genes identified in G2P.
genes_lvlA <- c("EGFR", "KRAS", "BRAF", "NRAS", "ERBB2", "MET", "ALK", "AKT1", "PDGFRA", "RET")
crispr_ptmuts_lvlA <- filter(crispr_data_ptmuts, Hugo_Symbol %in% genes_lvlA)
crispr_ptmuts_lvlA$Hugo_Symbol <- ordered(crispr_ptmuts_lvlA$Hugo_Symbol, levels = genes_lvlA)
crispr_ptmuts_lvlA_color <- as.character(crispr_ptmuts_lvlA$Color_DeleteriousMissense)
names(crispr_ptmuts_lvlA_color) <- crispr_ptmuts_lvlA$Mutation_Status_DeleteriousMissense
crispr_ptmuts_lvlA_plot <- ggplot(data = crispr_ptmuts_lvlA, aes(x = Hugo_Symbol, y = Score)) +
geom_hline(yintercept = 0, lty = 2, color = "darkgray") +
geom_violin(mapping = aes(fill = Mutation_Status_DeleteriousMissense), alpha = 0.7, position = position_dodge(0.85)) +
geom_boxplot(mapping = aes(color = Mutation_Status_DeleteriousMissense), position = position_dodge(0.85), width = 0.1) +
scale_fill_manual(values = crispr_ptmuts_lvlA_color) +
scale_color_manual(values = c("black", "black")) +
guides(color = FALSE) +
theme(legend.direction = "horizontal", legend.justification = c(1, 0), legend.position = c(1, 0), legend.box.margin = margin(c(10, 10, 10, 10))) +
labs(fill = "Mutation Status", y = "CERES Score", x = "Gene")
crispr_ptmuts_lvlA_plot
# ggsave("./plots_18Q3/manuscript/crispr_ptmuts_lvlA_plot.png", crispr_ptmuts_lvlA_plot, device = "png", width = 12, height = 5, units = "in")
crispr_ptmuts_lvlA_plot_faceted <- ggplot(data = crispr_ptmuts_lvlA, aes(x = Mutation_Status_DeleteriousMissense, y = Score)) +
facet_wrap(~ Hugo_Symbol, ncol = length(unique(crispr_ptmuts_lvlA$Hugo_Symbol))) +
geom_hline(yintercept = 0, lty = 2, color = "darkgray") +
geom_violin(mapping = aes(fill = Mutation_Status_DeleteriousMissense), alpha = 0.7, position = position_dodge(0.85)) +
geom_boxplot(mapping = aes(color = Mutation_Status_DeleteriousMissense), position = position_dodge(0.85), width = 0.1) +
scale_fill_manual(values = crispr_ptmuts_lvlA_color) +
scale_color_manual(values = c("black", "black")) +
guides(color = FALSE) +
theme(legend.direction = "horizontal", legend.justification = c(0.5, 0), legend.position = c(0.5, 0), legend.box.margin = margin(c(10, 10, 10, 10)), axis.ticks.x = element_blank(), axis.text.x = element_blank(), axis.title.x = element_blank()) +
labs(fill = "Mutation Status", y = "CERES Score", x = "Mutation Status")
crispr_ptmuts_lvlA_plot_faceted
# ggsave("./plots_18Q3/manuscript/crispr_ptmuts_lvlA_plot_faceted.png", crispr_ptmuts_lvlA_plot_faceted, device = "png", width = 12, height = 5, units = "in")
Match specific point mutations.
g2p_indications <- filter(read.delim("./data_munging/data_mutation_associations_appended.csv", sep = "\t", row.names = 1, header = TRUE), Evidence.Level == "A")
maf_g2p_indications <- filter(maf_raw, Genome_Change %in% g2p_indications$MutationName)
crispr_g2p_indications <- merge(maf_g2p_indications, crispr_data_ptmuts, by = c("Hugo_Symbol", "CCLE_Name", "Broad_ID"))
dup_g2p_indications <- filter(crispr_g2p_indications[, c("Hugo_Symbol", "CCLE_Name", "Broad_ID")] %>% group_by(Hugo_Symbol, CCLE_Name, Broad_ID) %>% tally(), n > 1)
crispr_g2p_indications <- merge(crispr_g2p_indications, dup_g2p_indications, by = c("Hugo_Symbol", "Broad_ID"), all.x = TRUE)
crispr_g2p_indications <- filter(crispr_g2p_indications, is.na(n))
crispr_g2p_indications_plot <- ggplot(crispr_g2p_indications, aes(x = Hugo_Symbol, y = Score)) +
geom_hline(yintercept = 0, lty = 2, color = "darkgray") +
geom_boxplot(alpha = 0.5, color = "lightgray", outlier.shape = NA) +
geom_jitter(width = 0.3, mapping = aes(color = group_general_lineage_name), size = 1) +
theme_light() +
theme(legend.title = element_blank()) +
labs(x = "Gene", y = "CERES Score")
crispr_g2p_indications_plot
# ggsave("./plots_18Q3/manuscript/crispr_g2p_indications_plot_test.png", crispr_g2p_indications_plot, width = 12, height = 5, units = "in")
The mutation status portion of these heatmaps was made using deleterious and missense mutations as “mutant” and all others as “other.”
Make and save heatmap data matrices:
# Set up heatmap data
chm_scores <- t(crispr)
colnames(chm_scores) <- chm_scores[1, ]
chm_scores <- chm_scores[-1, ]
chm_rows <- rownames(chm_scores)
chm_cols <- colnames(chm_scores)
chm_scores <- apply(chm_scores, 2, as.double)
chm_scores <- apply(chm_scores, 2, function(x) (x - mean(x)) / var(x))
colnames(chm_scores) <- chm_cols
rownames(chm_scores) <- chm_rows
saveRDS(chm_scores, "./data_munging/rds/crispr_heatmap_scores.rds")
# Make full grid for genomic features
crispr_grid_ccls <- unique(crispr_data$Broad_ID)
crispr_grid_genes <- unique(crispr_data$Hugo_Symbol)
crispr_grid <- expand.grid("Broad_ID" = crispr_grid_ccls, "Hugo_Symbol" = crispr_grid_genes)
# Gene expression
ge_filt <- filter(ge_melt, Hugo_Symbol %in% unique(crispr_data$Hugo_Symbol))[, c("Hugo_Symbol", "Broad_ID", "RPKM")]
ge_filt$RPKM <- log2(ge_filt$RPKM + 0.0001)
chm_ge_grid <- merge(crispr_grid, ge_filt, by = c("Broad_ID", "Hugo_Symbol"), all.x = TRUE)
chm_ge <- reshape(chm_ge_grid, idvar = "Hugo_Symbol", timevar = "Broad_ID", direction = "wide")
chm_ge_rows <- chm_ge$Hugo_Symbol
chm_ge$Hugo_Symbol <- NULL
chm_ge_cols <- colnames(chm_ge) %>% gsub("RPKM.", "", .)
chm_ge <- apply(chm_ge, 2, as.double)
chm_ge <- apply(chm_ge, 2, function(x) (x - mean(x, na.rm = TRUE)) / var(x, na.rm = TRUE))
rownames(chm_ge) <- chm_ge_rows
colnames(chm_ge) <- chm_ge_cols
saveRDS(chm_ge, "./data_munging/rds/crispr_heatmap_ge.rds")
# Mutation status
chm_mut_grid <- merge(crispr_grid, maf_df[, c("Broad_ID", "Hugo_Symbol", "Mutation_Status")], by = c("Broad_ID", "Hugo_Symbol"), all.x = TRUE)
chm_mut_grid$Mutation_Status <- ifelse(is.na(chm_mut_grid$Mutation_Status), "Other", chm_mut_grid$Mutation_Status)
chm_mut_grid$Mutation_Status <- ifelse(chm_mut_grid$Mutation_Status == "Other", 0, 1)
chm_mut <- reshape(chm_mut_grid, idvar = "Hugo_Symbol", timevar = "Broad_ID", direction = "wide")
chm_mut_rows <- chm_mut$Hugo_Symbol
chm_mut$Hugo_Symbol <- NULL
chm_mut_cols <- colnames(chm_mut) %>% gsub("Mutation_Status.", "", .)
chm_mut <- matrix(as.numeric(unlist(chm_mut)), nrow = nrow(chm_mut))
rownames(chm_mut) <- chm_mut_rows
colnames(chm_mut) <- chm_mut_cols
saveRDS(chm_mut, "./data_munging/rds/crispr_heatmap_mut.rds")
# Copy number
cn_filt <- filter(cn_melt, Hugo_Symbol %in% unique(crispr_data$Hugo_Symbol))
chm_cn_grid <- merge(crispr_grid, cn_melt, by = c("Broad_ID", "Hugo_Symbol"), all.x = TRUE)
chm_cn <- reshape(chm_cn_grid, idvar = "Hugo_Symbol", timevar = "Broad_ID", direction = "wide")
chm_cn_rows <- chm_cn$Hugo_Symbol
chm_cn$Hugo_Symbol <- NULL
chm_cn_cols <- colnames(chm_cn) %>% gsub("Copy_Number.", "", .)
chm_cn <- apply(chm_cn, 2, as.double)
chm_cn <- apply(chm_cn, 2, function(x) (x - mean(x, na.rm = TRUE)) / var(x, na.rm = TRUE))
rownames(chm_cn) <- chm_cn_rows
colnames(chm_cn) <- chm_cn_cols
saveRDS(chm_cn, "./data_munging/rds/crispr_heatmap_cn.rds")
Load heatmap matrices:
chm_scores <- readRDS("./data_munging/rds/crispr_heatmap_scores.rds")
chm_ge <- readRDS("./data_munging/rds/crispr_heatmap_ge.rds")
chm_mut <- readRDS("./data_munging/rds/crispr_heatmap_mut.rds")
chm_cn <- readRDS("./data_munging/rds/crispr_heatmap_cn.rds")
# Metadata anotation dataframe
chm_annot <- merge(merge(data.frame("Broad_ID" = crispr$Broad_ID), crispr_meta[, c("Broad_ID", "cell_line_SSMD", "cas9_activity", "culture_type", "primary_tissue")], by = "Broad_ID", all.x = TRUE), ccl_info[, c("Broad_ID", "Primary.Disease", "Gender", "Source")], by = "Broad_ID", all.x = TRUE)
rownames(chm_annot) <- chm_annot$Broad_ID
chm_annot$Broad_ID <- NULL
colnames(chm_annot) <- c("Cell_Line_SSMD", "Cas9_Activity", "Culture_Type", "Primary_Tissue", "Primary_Disease", "Gender", "Source")
chm_annot$Cell_Line_SSMD <- as.numeric(chm_annot$Cell_Line_SSMD)
chm_annot$Cas9_Activity <- as.numeric(chm_annot$Cas9_Activity)
chm_annot_list <- HeatmapAnnotation(df = chm_annot, annotation_legend_param = list(
Cell_Line_SSMD = list(title = "Cell Line SSMD", title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), title_position = "topcenter", legend_height = unit(2, "in")),
Cas9_Activity = list(title = "Cas9 Activity", title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), title_position = "topcenter", legend_height = unit(2, "in")),
Culture_Type = list(title = "Culture Type", title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), nrow = 8, title = "Culture Type", title_position = "topleft", grid_height = unit(0.3, "in")),
Primary_Tissue = list(title = "Primary Tissue", title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), nrow = 8, title = "Primary_Tissue", title_position = "topleft", grid_height = unit(0.3, "in")),
Primary_Disease = list(title = "Primary Disease", title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), nrow = 8, title = "Primary Disease", title_position = "topleft", grid_height = unit(0.3, "in")),
Gender = list(title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), grid_height = unit(0.3, "in")),
Source = list(title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), nrow = 8, title = "Source", title_position = "topleft", grid_height = unit(0.3, "in"))))
chm_all_plot <- Heatmap(chm_scores, name = "CERES Score",
bottom_annotation = chm_annot_list,
bottom_annotation_height = unit(5, "in"),
heatmap_legend_param = list(title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), title_position = "topcenter", legend_height = unit(2, "in")),
row_title = "Genes", column_title = "Cancer Cell Lines",
col = colorRamp2(c(min(chm_scores), 0, max(chm_scores)), c("purple4", "white", "seagreen4")),
na_col = "black",
row_title_gp = gpar(fontsize = 30, fontface = "bold"),
row_names_gp = gpar(fontsize = 5),
column_title_gp = gpar(fontsize = 30, fontface = "bold"),
column_names_gp = gpar(fontsize = 5),
row_dend_reorder = TRUE,
column_dend_reorder = TRUE,
clustering_distance_rows = "euclidean",
clustering_distance_columns = "euclidean",
clustering_method_rows = "complete",
clustering_method_columns = "complete",
column_dend_height = unit(2, "in"),
row_dend_width = unit(4, "in"),
width = 3)
pdf("./plots_18Q3/crispr_heatmap_euclidean_all.pdf", width = 50, height = 50, paper = "special")
draw(chm_all_plot, heatmap_legend_side = "bottom", annotation_legend_side = "bottom")
seekViewport("annotation_Cell_Line_SSMD")
grid.text("Cell Line SSMD", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Cas9_Activity")
grid.text("Cas9 Activity", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Culture_Type")
grid.text("Culture Type", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Primary_Tissue")
grid.text("Primary Tissue", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Primary_Disease")
grid.text("Primary Disease", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Gender")
grid.text("Gender", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Source")
grid.text("Source", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
dev.off()
png("./plots_18Q3/crispr_heatmap_euclidean_all.png", width = 50, height = 50, units = "in", res = 96)
draw(chm_all_plot, heatmap_legend_side = "bottom", annotation_legend_side = "bottom")
seekViewport("annotation_Cell_Line_SSMD")
grid.text("Cell Line SSMD", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Cas9_Activity")
grid.text("Cas9 Activity", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Culture_Type")
grid.text("Culture Type", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Primary_Tissue")
grid.text("Primary Tissue", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Primary_Disease")
grid.text("Primary Disease", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Gender")
grid.text("Gender", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Source")
grid.text("Source", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
dev.off()
Code for heatmap of CGC-filtered genes only
chm_scores_cgc <- subset(chm_scores, rownames(chm_scores) %in% cgc$Hugo_Symbol)
chm_ge_cgc <- subset(chm_ge, rownames(chm_ge) %in% cgc$Hugo_Symbol)
chm_mut_cgc <- subset(chm_mut, rownames(chm_mut) %in% cgc$Hugo_Symbol)
chm_cn_cgc <- subset(chm_cn, rownames(chm_cn) %in% cgc$Hugo_Symbol)
chm_annot_list <- HeatmapAnnotation(df = chm_annot, annotation_legend_param = list(
Cell_Line_SSMD = list(title = "Cell Line SSMD", title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), title_position = "topcenter", legend_height = unit(2, "in")),
Cas9_Activity = list(title = "Cas9 Activity", title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), title_position = "topcenter", legend_height = unit(2, "in")),
Culture_Type = list(title = "Culture Type", title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), nrow = 8, title = "Culture Type", title_position = "topleft", grid_height = unit(0.3, "in")),
Primary_Tissue = list(title = "Primary Tissue", title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), nrow = 8, title = "Primary_Tissue", title_position = "topleft", grid_height = unit(0.3, "in")),
Primary_Disease = list(title = "Primary Disease", title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), nrow = 8, title = "Primary Disease", title_position = "topleft", grid_height = unit(0.3, "in")),
Gender = list(title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), grid_height = unit(0.3, "in")),
Source = list(title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), nrow = 8, title = "Source", title_position = "topleft", grid_height = unit(0.3, "in"))))
chm_plot_cgc <- Heatmap(chm_scores_cgc, name = "CERES Score",
bottom_annotation = chm_annot_list,
bottom_annotation_height = unit(5, "in"),
heatmap_legend_param = list(title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), title_position = "topcenter", legend_height = unit(2, "in")),
row_title = "Genes", column_title = "Cancer Cell Lines",
col = colorRamp2(c(min(chm_scores_cgc), 0, max(chm_scores_cgc)), c("purple4", "white", "seagreen4")),
na_col = "black",
row_title_gp = gpar(fontsize = 30, fontface = "bold"),
row_names_gp = gpar(fontsize = 5),
column_title_gp = gpar(fontsize = 30, fontface = "bold"),
column_names_gp = gpar(fontsize = 5),
row_dend_reorder = TRUE,
column_dend_reorder = TRUE,
clustering_distance_rows = "euclidean",
clustering_distance_columns = "euclidean",
clustering_method_rows = "complete",
clustering_method_columns = "complete",
column_dend_height = unit(2, "in"),
row_dend_width = unit(4, "in"),
width = 3)
chm_plot_cgc_cols <- colnames(chm_scores_cgc)[unlist(column_order(chm_plot_cgc))]
chm_plot_cgc_rows <- rownames(chm_scores_cgc)[unlist(row_order(chm_plot_cgc))]
chm_ge_plot_cgc <- Heatmap(chm_ge_cgc, name = "Gene Expression",
col = colorRamp2(c(min(chm_ge_cgc[!is.na(chm_ge_cgc)]), 0, max(chm_ge_cgc[!is.na(chm_ge_cgc)])), c("dodgerblue4", "white", "firebrick4")),
heatmap_legend_param = list(title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), title_position = "topcenter", legend_height = unit(2, "in")),
na_col = "black",
cluster_rows = FALSE, cluster_columns = FALSE,
row_names_gp = gpar(fontsize = 5), show_column_names = FALSE,
column_title = "Gene Expression",
column_title_gp = gpar(fontsize = 30, fontface = "bold"),
column_order = chm_plot_cgc_cols,
row_order = chm_plot_cgc_rows,
width = 1)
chm_cn_plot_cgc <- Heatmap(chm_cn_cgc, name = "Copy Number",
col = colorRamp2(c(min(chm_cn_cgc[!is.na(chm_cn_cgc)]), 0, max(chm_cn_cgc[!is.na(chm_cn_cgc)])), c("white", "thistle2", "mediumorchid4")),
heatmap_legend_param = list(title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), title_position = "topcenter", legend_height = unit(2, "in")),
na_col = "black",
cluster_rows = FALSE, cluster_columns = FALSE,
row_names_gp = gpar(fontsize = 5), show_column_names = FALSE,
column_title = "Copy Number",
column_title_gp = gpar(fontsize = 30, fontface = "bold"),
column_order = chm_plot_cgc_cols,
row_order = chm_plot_cgc_rows,
width = 1)
chm_mut_plot_cgc <- Heatmap(chm_mut_cgc, name = "Mutation Status",
na_col = "black",
heatmap_legend_param = list(title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), at = c(0, 1), labels = c("Other", "Mutant"), title_position = "topcenter", grid_height = unit(0.3, "in")),
col = c("cyan3", "darkorchid"),
cluster_rows = FALSE, cluster_columns = FALSE,
row_names_gp = gpar(fontsize = 5), show_column_names = FALSE,
column_title = "Mutation Status",
column_title_gp = gpar(fontsize = 30, fontface = "bold"),
column_order = chm_plot_cgc_cols,
row_order = chm_plot_cgc_rows,
width = 1)
chm_all_plot_cgc <- chm_plot_cgc + chm_ge_plot_cgc + chm_cn_plot_cgc + chm_mut_plot_cgc
pdf("./plots_18Q3/crispr_heatmap_euclidean_all_cgc.pdf", width = 50, height = 50, paper = "special")
draw(chm_all_plot_cgc, heatmap_legend_side = "bottom", annotation_legend_side = "bottom")
seekViewport("annotation_Cell_Line_SSMD")
grid.text("Cell Line SSMD", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Cas9_Activity")
grid.text("Cas9 Activity", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Culture_Type")
grid.text("Culture Type", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Primary_Tissue")
grid.text("Primary Tissue", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Primary_Disease")
grid.text("Primary Disease", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Gender")
grid.text("Gender", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Source")
grid.text("Source", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
dev.off()
png("./plots_18Q3/crispr_heatmap_euclidean_all_cgc.png", width = 50, height = 50, units = "in", res = 96)
draw(chm_all_plot_cgc, heatmap_legend_side = "bottom", annotation_legend_side = "bottom")
seekViewport("annotation_Cell_Line_SSMD")
grid.text("Cell Line SSMD", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Cas9_Activity")
grid.text("Cas9 Activity", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Culture_Type")
grid.text("Culture Type", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Primary_Tissue")
grid.text("Primary Tissue", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Primary_Disease")
grid.text("Primary Disease", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Gender")
grid.text("Gender", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Source")
grid.text("Source", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
dev.off()
chm_scores_cgc <- subset(chm_scores, rownames(chm_scores) %in% cgc$Hugo_Symbol)
chm_ge_cgc <- subset(chm_ge, rownames(chm_ge) %in% cgc$Hugo_Symbol)
chm_mut_cgc <- subset(chm_mut, rownames(chm_mut) %in% cgc$Hugo_Symbol)
chm_cn_cgc <- subset(chm_cn, rownames(chm_cn) %in% cgc$Hugo_Symbol)
chm_annot_list <- HeatmapAnnotation(df = chm_annot, annotation_legend_param = list(
Cell_Line_SSMD = list(title = "Cell Line SSMD", title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), title_position = "topcenter", legend_height = unit(2, "in")),
Cas9_Activity = list(title = "Cas9 Activity", title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), title_position = "topcenter", legend_height = unit(2, "in")),
Culture_Type = list(title = "Culture Type", title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), nrow = 8, title = "Culture Type", title_position = "topleft", grid_height = unit(0.3, "in")),
Primary_Tissue = list(title = "Primary Tissue", title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), nrow = 8, title = "Primary_Tissue", title_position = "topleft", grid_height = unit(0.3, "in")),
Primary_Disease = list(title = "Primary Disease", title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), nrow = 8, title = "Primary Disease", title_position = "topleft", grid_height = unit(0.3, "in")),
Gender = list(title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), grid_height = unit(0.3, "in")),
Source = list(title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), nrow = 8, title = "Source", title_position = "topleft", grid_height = unit(0.3, "in"))))
chm_plot_cgc <- Heatmap(chm_scores_cgc, name = "CERES Score",
bottom_annotation = chm_annot_list,
bottom_annotation_height = unit(5, "in"),
heatmap_legend_param = list(title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), title_position = "topcenter", legend_height = unit(2, "in")),
row_title = "Genes", column_title = "Cancer Cell Lines",
col = colorRamp2(c(min(chm_scores_cgc), 0, max(chm_scores_cgc)), c("purple4", "white", "seagreen4")),
na_col = "black",
row_title_gp = gpar(fontsize = 30, fontface = "bold"),
row_names_gp = gpar(fontsize = 5),
column_title_gp = gpar(fontsize = 30, fontface = "bold"),
column_names_gp = gpar(fontsize = 5),
row_dend_reorder = TRUE,
column_dend_reorder = TRUE,
clustering_distance_rows = "pearson",
clustering_distance_columns = "pearson",
clustering_method_rows = "complete",
clustering_method_columns = "complete",
column_dend_height = unit(2, "in"),
row_dend_width = unit(4, "in"),
width = 3)
chm_plot_cgc_cols <- colnames(chm_scores_cgc)[unlist(column_order(chm_plot_cgc))]
chm_plot_cgc_rows <- rownames(chm_scores_cgc)[unlist(row_order(chm_plot_cgc))]
chm_ge_plot_cgc <- Heatmap(chm_ge_cgc, name = "Gene Expression",
col = colorRamp2(c(min(chm_ge_cgc[!is.na(chm_ge_cgc)]), 0, max(chm_ge_cgc[!is.na(chm_ge_cgc)])), c("dodgerblue4", "white", "firebrick4")),
heatmap_legend_param = list(title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), title_position = "topcenter", legend_height = unit(2, "in")),
na_col = "black",
cluster_rows = FALSE, cluster_columns = FALSE,
row_names_gp = gpar(fontsize = 5), show_column_names = FALSE,
column_title = "Gene Expression",
column_title_gp = gpar(fontsize = 30, fontface = "bold"),
column_order = chm_plot_cgc_cols,
row_order = chm_plot_cgc_rows,
width = 1)
chm_cn_plot_cgc <- Heatmap(chm_cn_cgc, name = "Copy Number",
col = colorRamp2(c(min(chm_cn_cgc[!is.na(chm_cn_cgc)]), 0, max(chm_cn_cgc[!is.na(chm_cn_cgc)])), c("white", "thistle2", "mediumorchid4")),
heatmap_legend_param = list(title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), title_position = "topcenter", legend_height = unit(2, "in")),
na_col = "black",
cluster_rows = FALSE, cluster_columns = FALSE,
row_names_gp = gpar(fontsize = 5), show_column_names = FALSE,
column_title = "Copy Number",
column_title_gp = gpar(fontsize = 30, fontface = "bold"),
column_order = chm_plot_cgc_cols,
row_order = chm_plot_cgc_rows,
width = 1)
chm_mut_plot_cgc <- Heatmap(chm_mut_cgc, name = "Mutation Status",
na_col = "black",
heatmap_legend_param = list(title_gp = gpar(fontsize = 20), labels_gp = gpar(fontsize = 15), at = c(0, 1), labels = c("Other", "Mutant"), title_position = "topcenter", grid_height = unit(0.3, "in")),
col = c("cyan3", "darkorchid"),
cluster_rows = FALSE, cluster_columns = FALSE,
row_names_gp = gpar(fontsize = 5), show_column_names = FALSE,
column_title = "Mutation Status",
column_title_gp = gpar(fontsize = 30, fontface = "bold"),
column_order = chm_plot_cgc_cols,
row_order = chm_plot_cgc_rows,
width = 1)
chm_all_plot_cgc <- chm_plot_cgc + chm_ge_plot_cgc + chm_cn_plot_cgc + chm_mut_plot_cgc
pdf("./plots_18Q3/crispr_heatmap_pearson_all_cgc.pdf", width = 50, height = 50, paper = "special")
draw(chm_all_plot_cgc, heatmap_legend_side = "bottom", annotation_legend_side = "bottom")
seekViewport("annotation_Cell_Line_SSMD")
grid.text("Cell Line SSMD", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Cas9_Activity")
grid.text("Cas9 Activity", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Culture_Type")
grid.text("Culture Type", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Primary_Tissue")
grid.text("Primary Tissue", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Primary_Disease")
grid.text("Primary Disease", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Gender")
grid.text("Gender", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
seekViewport("annotation_Source")
grid.text("Source", unit(1, "npc") + unit(1, "mm"), 0.5, gp = gpar(fontsize = 15), default.units = "npc", just = "left")
dev.off()
Plot code:
crispr_data_cgc <- filter(crispr_data, Hugo_Symbol %in% cgc$Hugo_Symbol)
crispr_all_genes <- unique(crispr_data_cgc$Hugo_Symbol)
crispr_all_groups <- lapply(crispr_all_genes, makeCRISPRgrob)
crispr_all_groups_paths <- paste0(crispr_all_genes, "_crispr_grouped.png")
pwalk(list(crispr_all_groups_paths, crispr_all_groups), ggsave, path = "./plots_18Q3/crispr_grouped_plots_cgc", dpi = 300, width = 12, height = 12, units = "in")
Data management:
crispr_all_bygene <- paste0(list.files("./plots_18Q3/crispr_grouped_plots_cgc", full.names = TRUE))
names(crispr_all_bygene) <- str_replace_all(crispr_all_bygene, c("_crispr_grouped.png" = "", "./plots_18Q3/crispr_grouped_plots_cgc/" = ""))
crispr_bygene_order <- c(intersect(crispr_signif$Hugo_Symbol, names(crispr_all_bygene)), setdiff(names(crispr_all_bygene), crispr_signif$Hugo_Symbol))
crispr_all_bygene <- crispr_all_bygene[match(crispr_bygene_order, names(crispr_all_bygene))]
bsselect(crispr_all_bygene, type = "img", selected = "KRAS", live_search = TRUE, show_tick = TRUE, height = 300, frame_height = 275)
Plot code:
crispr_data_cgc <- filter(crispr_data, Hugo_Symbol %in% cgc$Hugo_Symbol)
crispr_all_genes <- unique(crispr_data_cgc$Hugo_Symbol)
crispr_all_lins <- lapply(crispr_all_genes, makeCRISPRlinplot)
crispr_all_lins_paths <- paste0(crispr_all_genes, "_crispr_lineage.png")
pwalk(list(crispr_all_lins_paths, crispr_all_lins), ggsave, path = "./plots_18Q3/crispr_lineage_plots_cgc", dpi = 300, width = 12, height = 12, units = "in")
Data management:
crispr_all_bygene <- paste0(list.files("./plots_18Q3/crispr_lineage_plots_cgc", full.names = TRUE))
names(crispr_all_bygene) <- str_replace_all(crispr_all_bygene, c("_crispr_lineage.png" = "", "./plots_18Q3/crispr_lineage_plots_cgc/" = ""))
crispr_bygene_order <- c(intersect(crispr_signif$Hugo_Symbol, names(crispr_all_bygene)), setdiff(names(crispr_all_bygene), crispr_signif$Hugo_Symbol))
crispr_all_bygene <- crispr_all_bygene[match(crispr_bygene_order, names(crispr_all_bygene))]
bsselect(crispr_all_bygene, type = "img", selected = "KRAS", live_search = TRUE, show_tick = TRUE, height = 300, frame_height = 275)
Plot code:
crispr_data_cgc <- filter(crispr_data, Hugo_Symbol %in% cgc$Hugo_Symbol)
crispr_all_genes <- unique(crispr_data_cgc$Hugo_Symbol)
crispr_all_lins <- lapply(crispr_all_genes, makeCRISPRtissueplot)
crispr_all_lins_paths <- paste0(crispr_all_genes, "_crispr_tissue.png")
pwalk(list(crispr_all_lins_paths, crispr_all_lins), ggsave, path = "./plots_18Q3/crispr_tissue_plots_cgc", dpi = 300, width = 12, height = 12, units = "in")
Data management:
crispr_all_bygene <- paste0(list.files("./plots_18Q3/crispr_tissue_plots_cgc", full.names = TRUE))
names(crispr_all_bygene) <- str_replace_all(crispr_all_bygene, c("_crispr_tissue.png" = "", "./plots_18Q3/crispr_tissue_plots_cgc/" = ""))
crispr_bygene_order <- c(intersect(crispr_signif$Hugo_Symbol, names(crispr_all_bygene)), setdiff(names(crispr_all_bygene), crispr_signif$Hugo_Symbol))
crispr_all_bygene <- crispr_all_bygene[match(crispr_bygene_order, names(crispr_all_bygene))]
bsselect(crispr_all_bygene, type = "img", selected = "KRAS", live_search = TRUE, show_tick = TRUE, height = 300, frame_height = 275)
tissue_summ <- crispr_meta %>% group_by(primary_tissue) %>% tally()
disease_summ <- ccl_info %>% group_by(Primary.Disease) %>% tally()
ggplot(data = tissue_summ) +
aes(x = 0, y = n, fill = primary_tissue, label = primary_tissue) +
geom_histogram(color = "black", stat = "identity") +
geom_text(position = position_stack(vjust = 0.5)) +
scale_y_continuous(breaks = seq(0, 500, by = 25), labels = seq(0, 500, by = 25)) +
coord_cartesian(ylim = c(0, 500)) +
theme(legend.position = "none", axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.title.x = element_blank()) +
labs(y = "Count")
Distribution of scores across cell lines and primary tissues. Each data point represents a single cell line.
ceres_summ <- crispr_data %>% group_by(primary_tissue) %>% summarize(Max = max(Score),
Min = min(Score))
score_summ_plot <- ggplot(data = crispr_data, aes(x = primary_tissue, color = primary_tissue)) +
geom_boxplot(mapping = aes(y = Score), outlier.shape = NA) +
geom_point(data = ceres_summ, mapping = aes(x = primary_tissue, y = Min)) +
geom_point(data = ceres_summ, mapping = aes(x = primary_tissue, y = Max)) +
scale_y_continuous(breaks = seq(-4, 6, by = 1), labels = seq(-4, 6, by = 1)) +
theme(axis.text.x = element_text(angle = 30, hjust = 1), legend.position = "none") +
labs(x = "Primary Tissue", y = "CERES Score")
score_summ_plot
# ggsave(filename = "./plots_18Q3/crispr/ceres_score_summ_plot.pdf", plot = score_summ_plot, width = 12, height = 4, device = "pdf")
maf_df_filt <- filter(maf_df, Hugo_Symbol %in% unique(crispr_data$Hugo_Symbol))
# maf_summ_filt <- maf_df_filt %>% group_by(Mutation_Status) %>% tally()
# maf_summ_filt$Percent <- format(round(maf_summ_filt$n / sum(maf_summ_filt$n) * 100, 4), nsmall = 2)
# maf_summ_filt$Percent <- as.numeric(as.character(maf_summ_filt$Percent))
Distribution of mutations across cell lines and primary tissues. Each data point represents a single cell line.
maf_ccl_annot <- merge(maf_df %>% group_by(Broad_ID) %>% tally(), crispr_meta, by = "Broad_ID", all.y = TRUE)
maf_ccl_annot_plot <- ggplot(data = maf_ccl_annot, aes(x = primary_tissue, y = n, color = primary_tissue)) +
geom_boxplot(outlier.shape = NA, fill = NA) +
geom_jitter(alpha = 0.7, size = 1, position = position_jitter(w = 0.25)) +
scale_y_continuous(breaks = seq(0, 9000, by = 1000), labels = seq(0, 9000, by = 1000)) +
theme(axis.ticks.x = element_blank(), axis.title.x = element_blank(), axis.text.x = element_blank(), legend.position = "none") +
labs(x = "Primary Tissue", y = "Number of Mutations")
maf_ccl_annot_plot
# ggsave(filename = "./plots_18Q3/crispr/maf_ccl_annot_plot.pdf", plot = maf_ccl_annot_plot, width = 12, height = 4, device = "pdf")
# ggsave(filename = "./plots_18Q3/crispr/score_mut_summ.pdf", plot = grid.draw(rbind(ggplotGrob(maf_ccl_annot_plot), ggplotGrob(score_summ_plot), size = "first")), width = 12, height = 8, device = "pdf")
maf_gene_annot <- maf_df_filt %>% group_by(Hugo_Symbol) %>% tally()
ggplot(data = maf_gene_annot, aes(x = n)) +
geom_histogram(breaks = seq(0, 600, by = 20), fill = "darkslategray3", color = "black", alpha = 0.7) +
coord_cartesian(ylim = c(0, 13000)) +
scale_y_continuous(breaks = seq(0, 13500, by = 500), labels = formatC(seq(0, 13500, by = 500), format = "d")) +
scale_x_continuous(breaks = seq(0, 600, by = 20), labels = seq(0, 600, by = 20)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
stat_bin(breaks = seq(0, 600, by = 20), geom = "text", colour = "black", size = 2.5, aes(label = ..count..), vjust = -0.2, angle = 45, hjust = -0.1) +
labs(x = "Number of Mutations", y = "Frequency", title = paste0("Distribution of mutational load for ", length(unique(maf_gene_annot$Hugo_Symbol)), " genes in the CCLE MAF file across all ", length(unique(crispr_ccl$Broad_ID)), " cell lines in the CRISPR screen"))
drop_prob_100 <- ggplot(data = crispr_data, aes(x = Dep_Prob)) +
geom_histogram(breaks = seq(0, 1, by = 0.05), fill = "darkslategray3", color = "black", alpha = 0.7) +
scale_x_continuous(breaks = seq(0, 1, by = 0.05), labels = seq(0, 1, by = 0.05)) +
scale_y_continuous(breaks = seq(0, 5500000, by = 500000), labels = formatC(seq(0, 5500000, by = 500000), format = "d")) +
stat_bin(breaks = seq(0, 1, by = 0.05), geom = "text", colour = "black", size = 3.5, aes(label = ..count..), vjust = -1) +
labs(title = "Distribution of all dependency probabilities", x = "Probability of Real Dependency", y = "Frequency")
dep_prob_5 <- ggplot(data = filter(crispr_data, Dep_Prob >= 0.95), aes(x = Dep_Prob)) +
geom_histogram(mapping = aes(fill = primary_tissue), breaks = seq(0.95, 1, by = 0.005), color = "black") +
scale_y_continuous(breaks = seq(0, 60000, by = 10000), labels = formatC(seq(0, 60000, by = 10000), format = "d")) +
theme(legend.position = "bottom") +
guides(fill = guide_legend(nrow = 4, byrow = FALSE)) +
stat_bin(breaks = seq(0.95, 1, by = 0.005), geom = "text", color = "black", size = 3.5, aes(label = ..count..), vjust = -1) +
labs(title = "Distribution of all dependency probabilities > 0.95", x = "Probability of Real Dependency", y = "Frequency", fill = "Primary Tissue")
sig_genes <- c("KRAS", "TP53", "NRAS", "BRAF", "PIK3CA", "PTEN", "CTNNB1")
crispr_sig_genes <- filter(crispr_data, Hugo_Symbol %in% sig_genes)
crispr_sig_genes$Hugo_Symbol <- factor(crispr_sig_genes$Hugo_Symbol, levels = c("KRAS", "TP53", "NRAS", "BRAF", "PIK3CA", "PTEN", "CTNNB1"))
crispr_color <- as.character(crispr_sig_genes$Color)
names(crispr_color) <- crispr_sig_genes$Mutation_Status
# Mutation status
plot_mut <- ggplot(data = crispr_sig_genes, aes(x = Mutation_Status, y = Score, color = Mutation_Status)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(alpha = 0.3, size = 0.7, position = position_jitter(w = 0.05)) +
facet_wrap(~ Hugo_Symbol, nrow = 7, scales = "free_y") +
scale_color_manual(values = crispr_color) +
geom_hline(yintercept = 0, linetype = 2, lwd = 0.3) +
theme_light() +
theme(legend.position = "none") +
labs(x = "Mutation Status", y = "CERES Score",
title = "Mutation Status")
# plot_mut
# Copy number
plot_cn <- ggplot(data = crispr_sig_genes, aes(x = Copy_Number, y = Score, color = Mutation_Status)) +
geom_point(size = 0.5, alpha = 0.5) +
geom_smooth(method = "lm", size = 0.5) +
scale_color_manual(values = crispr_color) +
facet_wrap(~ Hugo_Symbol, nrow = 7, scales = "free") +
stat_cor(method = "pearson", show.legend = FALSE, label.x.npc = "right", label.y.npc = "top") +
theme_light() +
theme(legend.position = "none", axis.title.y = element_blank()) +
labs(y = "CERES Score", color = "Mutation Status",
x = "Theoretical Copy number", title = "Copy Number")
# plot_cn
# Gene expression
plot_ge <- ggplot(data = crispr_sig_genes, aes(x = RPKM_log2, y = Score, color = Mutation_Status)) +
geom_point(size = 0.5, alpha = 0.5) +
geom_smooth(method = "lm", size = 0.5) +
facet_wrap(~ Hugo_Symbol, nrow = 7, scales = "free_y") +
scale_color_manual(values = crispr_color) +
stat_cor(method = "pearson", show.legend = FALSE, label.x.npc = "left", label.y.npc = "top") +
theme_light() +
theme(legend.position = "none", axis.title.y = element_blank()) +
labs(y = "CERES Score", color = "Mutation Status",
x = "Gene Expression [log2(RPKM)]",
title = "Gene Expression")
# plot_ge
# ggsave(filename = "./plots_18Q3/crispr/omics_sig_genes.pdf", plot = grid.draw(cbind(ggplotGrob(plot_mut), ggplotGrob(plot_ge), ggplotGrob(plot_cn), size = "last")), width = 15, height = 18, device = "pdf")
DepMap cell line metadata:
# From figshare
shrna_meta <- read.delim("./data_munging/sample_info_18Q3_shrna.csv", sep = ",", header = TRUE, na.strings = c("", NA))
colnames(shrna_meta)[1] <- "CCLE_Name"
The Achilles shRNA DEMETER score data was pulled from the CTD2 Data Portal (Tsherniak et al 2017).
shrna <- read.table("./data_munging/D2_combined_gene_dep_scores.csv.gz", sep = ",", header = TRUE, check.names = FALSE)
colnames(shrna)[1] <- "Hugo_Symbol"
# Remove Entrez gene IDs from gene names
shrna$Hugo_Symbol <- gsub(" .*", "", shrna$Hugo_Symbol)
# Melt shRNA dataset for merging
shrna_melt <- melt(shrna , id.vars = "Hugo_Symbol", measure.vars = colnames(shrna)[2:ncol(shrna)], variable.name = "CCLE_Name", value.name = "Score")
shrna_melt <- drop_na(shrna_melt)
Merge annotation data:
# Merge cell line metadata
shrna_melt <- merge(shrna_melt, ccl_info, by = "CCLE_Name", all.x = TRUE)
shrna_melt <- merge(shrna_melt, shrna_meta, by = "CCLE_Name", all.x = TRUE)
# Merge mutation annotations
shrna_muts <- merge(shrna_melt, maf_df, by = c("Hugo_Symbol", "CCLE_Name", "Broad_ID"), all.x = TRUE)
shrna_muts <- shrna_muts %>% mutate(Mutation_Status = if_else(is.na(Mutation_Status), "Other", Mutation_Status))
shrna_muts$Hugo_Symbol <- factor(shrna_muts$Hugo_Symbol)
# Summarize number of mutant and Other cell lines
shrna_muts_summ <- shrna_muts %>% group_by(Hugo_Symbol) %>%
summarize(N_Other = sum(Mutation_Status == "Other"),
N_Mutant = sum(Mutation_Status == "Mutant"))
# Merge test results back into full dataset, which restores information lost in the summarization
shrna_data <- merge(shrna_muts_summ, shrna_muts, by = "Hugo_Symbol")
# Add Color column
shrna_data$Color <- ifelse(shrna_data$Mutation_Status == "Other", "cyan3", "darkorchid")
shrna_data$Color <- factor(shrna_data$Color)
# Cell line lineages
shrna_data <- merge(shrna_data, ccl_converter, by = c("CCLE_Name", "Broad_ID"), all.x = TRUE)
levels(shrna_data$lineage_name) <- sort(levels(shrna_data$lineage_name), decreasing = TRUE)
# Copy number
shrna_data <- merge(shrna_data, cn_melt, by = c("Hugo_Symbol", "Broad_ID"), all.x = TRUE)
# Gene expression (RPKM)
ge_filt <- filter(ge_melt, Hugo_Symbol %in% unique(shrna_data$Hugo_Symbol))
shrna_data <- merge(shrna_data, ge_filt, by = c("Hugo_Symbol", "Broad_ID", "CCLE_Name"), all.x = TRUE)
saveRDS(shrna_data, "./../crispr_lineages_giant_files/shrna_data_18Q3.rds", compress = "xz")
shrna_data <- readRDS("./../crispr_lineages_giant_files/shrna_data_18Q3.rds")
shrna_ccl <- data.frame("Broad_ID" = shrna_data$Broad_ID)
shrna_signif <- compare_means(Score ~ Mutation_Status, group.by = c("Hugo_Symbol"), data = shrna_data, method = "wilcox.test", p.adjust.method = "BH")
shrna_signif <- adj_signif(shrna_signif)
shrna_signif <- shrna_signif[order(shrna_signif$p),]
saveRDS(shrna_signif, "./data_munging/rds/shrna_signif.rds")
shrna_signif <- readRDS("./data_munging/rds/shrna_signif.rds")
knitr::kable(filter(shrna_signif, p < 0.01)[, c("Hugo_Symbol", "p", "p.adj", "p.format", "p.signif", "p.signif.adj")], caption = "shRNA Screen: Wilcoxon Test Results Comparing Mutant and Other Cell Lines, p < 0.1 (Benjamini-Hochberg-corrected p-values: * p <= 0.05, ** p <= 0.01, *** p <= 0.001, **** p <= 0.0001)") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width = "900px", height = "450px")
shrna_signif_lineage <- compare_means(Score ~ Mutation_Status, group.by = c("Hugo_Symbol", "lineage_name"), data = shrna_data, method = "wilcox.test", p.adjust.method = "BH")
shrna_signif_lineage <- adj_signif(shrna_signif_lineage)
shrna_signif_lineage <- mutate(shrna_signif_lineage, lineage_name = reorder(lineage_name, p.adj, mean))
saveRDS(shrna_signif_lineage, "./../crispr_lineages_giant_files/shrna_signif_lineage.rds")
shrna_signif_lineage <- readRDS("./../crispr_lineages_giant_files/shrna_signif_lineage.rds")
Barretina, J., Caponigro, G., Stransky, N., Venkatesan, K., Margolin, A. A., Kim, S., … Garraway, L. A. (2012). The Cancer Cell Line Encyclopedia enables predictive modelling of anticancer drug sensitivity. Nature, 483(7391), 603–607. https://doi.org/10.1038/nature11003
Broad Institute Cancer Dependency Map; Cancer Data Science (2018): Cancer Dependency Map, CRISPR Avana dataset 18Q3 (Avana_public_18Q3). figshare. Fileset. doi:10.6084/m9.figshare.6931364.v1
Consortium, T. C. C. L. E., & Consortium, T. G. of D. S. in C. (2015). Pharmacogenomic agreement between two cancer cell line data sets. Nature, 528(7580), 84–87. https://doi.org/10.1038/nature15736
Data Science, Cancer (2018): DEMETER2 data. figshare. Fileset. doi:10.6084/m9.figshare.6025238.v2
Doench, J. G., Fusi, N., Sullender, M., Hegde, M., Vaimberg, E. W., Donovan, K. F., … Root, D. E. (2016). Optimized sgRNA design to maximize activity and minimize off-target effects of CRISPR-Cas9. Nature Biotechnology, 34(2), 184–191. https://doi.org/10.1038/nbt.3437
Meyers, R. M., Bryan, J. G., McFarland, J. M., Weir, B. A., Sizemore, A. E., Xu, H., … Tsherniak, A. (2017). Computational correction of copy-number effect improves specificity of CRISPR-Cas9 essentiality screens in cancer cells. Nature Genetics, 49(12), 1779–1784. https://doi.org/10.1038/ng.3984
McFarland, J. M., Ho, Z. V., Kugener, G., Dempster, J. M., Montgomery, P. G., Bryan, J. G., … Tsherniak, A. (2018). Improved estimation of cancer dependencies from large-scale RNAi screens using model-based normalization and data integration. https://doi.org/10.1101/305656
print(sessionInfo())
## R version 3.5.0 (2018-04-23)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] bindrcpp_0.2.2 circlize_0.4.4 ComplexHeatmap_1.18.1
## [4] bsselectR_0.1.0 caret_6.0-80 lattice_0.20-35
## [7] reshape2_1.4.3 devtools_1.13.6 glmnet_2.0-16
## [10] foreach_1.4.4 Matrix_1.2-14 broom_0.5.0
## [13] gridExtra_2.3 kableExtra_0.9.0 matrixStats_0.54.0
## [16] forcats_0.3.0 stringr_1.3.1 dplyr_0.7.6
## [19] purrr_0.2.5 readr_1.1.1 tidyr_0.8.1
## [22] tibble_1.4.2 tidyverse_1.2.1 plyr_1.8.4
## [25] CePa_0.6 data.table_1.11.4 rowr_1.1.3
## [28] ggsignif_0.4.0 ggpubr_0.1.7.999 magrittr_1.5
## [31] ggplot2_3.0.0 NMF_0.21.0 Biobase_2.40.0
## [34] BiocGenerics_0.26.0 cluster_2.0.7-1 rngtools_1.3.1
## [37] pkgmaker_0.27 registry_0.5
##
## loaded via a namespace (and not attached):
## [1] colorspace_1.3-2 rjson_0.2.20 class_7.3-14
## [4] rprojroot_1.3-2 GlobalOptions_0.1.0 pls_2.6-0
## [7] rstudioapi_0.7 DRR_0.0.3 prodlim_2018.04.18
## [10] lubridate_1.7.4 xml2_1.2.0 splines_3.5.0
## [13] codetools_0.2-15 doParallel_1.0.11 robustbase_0.93-1.1
## [16] knitr_1.20 RcppRoll_0.3.0 jsonlite_1.5
## [19] gridBase_0.4-7 ddalpha_1.3.4 kernlab_0.9-26
## [22] sfsmisc_1.1-2 graph_1.58.0 compiler_3.5.0
## [25] httr_1.3.1 backports_1.1.2 assertthat_0.2.0
## [28] lazyeval_0.2.1 cli_1.0.0 htmltools_0.3.6
## [31] tools_3.5.0 igraph_1.2.1 gtable_0.2.0
## [34] glue_1.3.0 Rcpp_0.12.18 cellranger_1.1.0
## [37] nlme_3.1-137 iterators_1.0.10 timeDate_3043.102
## [40] gower_0.1.2 rvest_0.3.2 DEoptimR_1.0-8
## [43] MASS_7.3-50 scales_0.5.0 ipred_0.9-6
## [46] hms_0.4.2 RColorBrewer_1.1-2 yaml_2.1.19
## [49] memoise_1.1.0 rpart_4.1-13 stringi_1.2.4
## [52] highr_0.7 bibtex_0.4.2 shape_1.4.4
## [55] lava_1.6.2 geometry_0.3-6 rlang_0.2.1
## [58] pkgconfig_2.0.1 evaluate_0.11 bindr_0.1.1
## [61] labeling_0.3 htmlwidgets_1.2 recipes_0.1.3
## [64] CVST_0.2-2 tidyselect_0.2.4 R6_2.2.2
## [67] dimRed_0.1.0 pillar_1.3.0 haven_1.1.2
## [70] withr_2.1.2 nnet_7.3-12 survival_2.42-6
## [73] abind_1.4-5 modelr_0.1.2 crayon_1.3.4
## [76] rmarkdown_1.10 GetoptLong_0.1.7 readxl_1.1.0
## [79] Rgraphviz_2.24.0 ModelMetrics_1.1.0 digest_0.6.15
## [82] xtable_1.8-2 stats4_3.5.0 munsell_0.5.0
## [85] viridisLite_0.3.0 magic_1.5-8